X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvp8.c;h=bcf623790c710352d83953bbae4bcd9d1671c27c;hb=dd18c9a050ac1f1437151ceb2d2afbc96c5602d8;hp=1136f38a860900a7f3647b63306779d89607f4b5;hpb=a8ab0cccf7a3f74daf6258b2ead63671f575cf3c;p=ffmpeg diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 1136f38a860..bcf623790c7 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -22,6 +22,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavcore/imgutils.h" #include "avcodec.h" #include "vp56.h" #include "vp8data.h" @@ -86,12 +87,10 @@ typedef struct { VP8Macroblock *macroblocks; VP8Macroblock *macroblocks_base; VP8FilterStrength *filter_strength; - int mb_stride; - uint8_t *intra4x4_pred_mode; - uint8_t *intra4x4_pred_mode_base; + uint8_t *intra4x4_pred_mode_top; + uint8_t intra4x4_pred_mode_left[4]; uint8_t *segmentation_map; - int b4_stride; /** * Cache of the top row needed for intra prediction @@ -117,6 +116,7 @@ typedef struct { */ DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; + DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; uint8_t intra4x4_pred_mode_mb[16]; int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock @@ -162,13 +162,14 @@ typedef struct { /** * filter strength adjustment for the following macroblock modes: - * [0] - i4x4 - * [1] - zero mv - * [2] - inter modes except for zero or split mv - * [3] - split mv + * [0-3] - i16x16 (always zero) + * [4] - i4x4 + * [5] - zero mv + * [6] - inter modes except for zero or split mv + * [7] - split mv * i16x16 modes never have any adjustment */ - int8_t mode[4]; + int8_t mode[VP8_MVMODE_SPLIT+1]; /** * filter strength adjustment for macroblocks that reference: @@ -194,7 +195,8 @@ typedef struct { uint8_t golden; uint8_t pred16x16[4]; uint8_t pred8x8c[3]; - uint8_t token[4][8][3][NUM_DCT_TOKENS-1]; + /* Padded to allow overreads */ + uint8_t token[4][17][3][NUM_DCT_TOKENS-1]; uint8_t mvc[2][19]; } prob[2]; } VP8Context; @@ -211,21 +213,18 @@ static void vp8_decode_flush(AVCodecContext *avctx) av_freep(&s->macroblocks_base); av_freep(&s->filter_strength); - av_freep(&s->intra4x4_pred_mode_base); + av_freep(&s->intra4x4_pred_mode_top); av_freep(&s->top_nnz); av_freep(&s->edge_emu_buffer); av_freep(&s->top_border); av_freep(&s->segmentation_map); s->macroblocks = NULL; - s->intra4x4_pred_mode = NULL; } static int update_dimensions(VP8Context *s, int width, int height) { - int i; - - if (avcodec_check_dimensions(s->avctx, width, height)) + if (av_image_check_size(width, height, 0, s->avctx)) return AVERROR_INVALIDDATA; vp8_decode_flush(s->avctx); @@ -235,28 +234,18 @@ static int update_dimensions(VP8Context *s, int width, int height) s->mb_width = (s->avctx->coded_width +15) / 16; s->mb_height = (s->avctx->coded_height+15) / 16; - // we allocate a border around the top/left of intra4x4 modes - // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle - s->mb_stride = s->mb_width+1; - s->b4_stride = 4*s->mb_stride; - - s->macroblocks_base = av_mallocz((s->mb_stride+s->mb_height*2+2)*sizeof(*s->macroblocks)); - s->filter_strength = av_mallocz(s->mb_stride*sizeof(*s->filter_strength)); - s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); + s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks)); + s->filter_strength = av_mallocz(s->mb_width*sizeof(*s->filter_strength)); + s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4); s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); - s->segmentation_map = av_mallocz(s->mb_stride*s->mb_height); + s->segmentation_map = av_mallocz(s->mb_width*s->mb_height); - if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base || + if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top || !s->top_nnz || !s->top_border || !s->segmentation_map) return AVERROR(ENOMEM); s->macroblocks = s->macroblocks_base + 1; - s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; - - memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride); - for (i = 0; i < 4*s->mb_height; i++) - s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED; return 0; } @@ -290,7 +279,7 @@ static void update_lf_deltas(VP8Context *s) for (i = 0; i < 4; i++) s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); - for (i = 0; i < 4; i++) + for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); } @@ -311,11 +300,11 @@ static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) if (buf_size - size < 0) return -1; - vp56_init_range_decoder(&s->coeff_partition[i], buf, size); + ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size); buf += size; buf_size -= size; } - vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); + ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); return 0; } @@ -395,7 +384,7 @@ static void update_refs(VP8Context *s) static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) { VP56RangeCoder *c = &s->c; - int header_size, hscale, vscale, i, j, k, l, ret; + int header_size, hscale, vscale, i, j, k, l, m, ret; int width = s->avctx->width; int height = s->avctx->height; @@ -435,7 +424,10 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) av_log_missing_feature(s->avctx, "Upscaling", 1); s->update_golden = s->update_altref = VP56_FRAME_CURRENT; - memcpy(s->prob->token , vp8_token_default_probs , sizeof(s->prob->token)); + for (i = 0; i < 4; i++) + for (j = 0; j < 16; j++) + memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], + sizeof(s->prob->token[i][j])); memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); @@ -448,7 +440,7 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) return ret; } - vp56_init_range_decoder(c, buf, header_size); + ff_vp56_init_range_decoder(c, buf, header_size); buf += header_size; buf_size -= header_size; @@ -495,8 +487,11 @@ static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) for (j = 0; j < 8; j++) for (k = 0; k < 3; k++) for (l = 0; l < NUM_DCT_TOKENS-1; l++) - if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) - s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8); + if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) { + int prob = vp8_rac_get_uint(c, 8); + for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) + s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; + } if ((s->mbskip_enabled = vp8_rac_get(c))) s->prob->mbskip = vp8_rac_get_uint(c, 8); @@ -534,7 +529,7 @@ void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) } static av_always_inline -void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, +void find_near_mvs(VP8Context *s, VP8Macroblock *mb, VP56mv near[2], VP56mv *best, uint8_t cnt[4]) { VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, @@ -643,19 +638,32 @@ const uint8_t *get_submv_prob(uint32_t left, uint32_t top) static av_always_inline int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) { - int part_idx = mb->partitioning = - vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); - int n, num = vp8_mbsplit_count[part_idx]; + int part_idx; + int n, num; VP8Macroblock *top_mb = &mb[2]; VP8Macroblock *left_mb = &mb[-1]; const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning], *mbsplits_top = vp8_mbsplits[top_mb->partitioning], - *mbsplits_cur = vp8_mbsplits[part_idx], - *firstidx = vp8_mbfirstidx[part_idx]; + *mbsplits_cur, *firstidx; VP56mv *top_mv = top_mb->bmv; VP56mv *left_mv = left_mb->bmv; VP56mv *cur_mv = mb->bmv; + if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { + if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { + part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]); + } else { + part_idx = VP8_SPLITMVMODE_8x8; + } + } else { + part_idx = VP8_SPLITMVMODE_4x4; + } + + num = vp8_mbsplit_count[part_idx]; + mbsplits_cur = vp8_mbsplits[part_idx], + firstidx = vp8_mbfirstidx[part_idx]; + mb->partitioning = part_idx; + for (n = 0; n < num; n++) { int k = firstidx[n]; uint32_t left, above; @@ -672,20 +680,19 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) submv_prob = get_submv_prob(left, above); - switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) { - case VP8_SUBMVMODE_NEW4X4: - mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); - mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); - break; - case VP8_SUBMVMODE_ZERO4X4: - AV_ZERO32(&mb->bmv[n]); - break; - case VP8_SUBMVMODE_LEFT4X4: + if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { + if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { + if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { + mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]); + mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]); + } else { + AV_ZERO32(&mb->bmv[n]); + } + } else { + AV_WN32A(&mb->bmv[n], above); + } + } else { AV_WN32A(&mb->bmv[n], left); - break; - case VP8_SUBMVMODE_TOP4X4: - AV_WN32A(&mb->bmv[n], above); - break; } } @@ -693,31 +700,32 @@ int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb) } static av_always_inline -void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, - int stride, int keyframe) +void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, + int mb_x, int keyframe) { - int x, y, t, l, i; - + uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; if (keyframe) { - const uint8_t *ctx; + int x, y; + uint8_t* const top = s->intra4x4_pred_mode_top + 4 * mb_x; + uint8_t* const left = s->intra4x4_pred_mode_left; for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { - t = intra4x4[x - stride]; - l = intra4x4[x - 1]; - ctx = vp8_pred4x4_prob_intra[t][l]; - intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); + const uint8_t *ctx; + ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; + *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); + left[y] = top[x] = *intra4x4; + intra4x4++; } - intra4x4 += stride; } } else { + int i; for (i = 0; i < 16; i++) intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); } } static av_always_inline -void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, - uint8_t *intra4x4, uint8_t *segment) +void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment) { VP56RangeCoder *c = &s->c; @@ -731,16 +739,18 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); if (mb->mode == MODE_I4x4) { - decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1); - } else - fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); + decode_intra4x4_modes(s, c, mb_x, 1); + } else { + const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u; + AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes); + AV_WN32A(s->intra4x4_pred_mode_left, modes); + } s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); mb->ref_frame = VP56_FRAME_CURRENT; } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) { VP56mv near[2], best; uint8_t cnt[4] = { 0 }; - uint8_t p[4]; // inter MB, 16.2 if (vp56_rac_get_prob_branchy(c, s->prob->last)) @@ -751,31 +761,27 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, s->ref_count[mb->ref_frame-1]++; // motion vectors, 16.3 - find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); - p[0] = vp8_mode_contexts[cnt[0]][0]; - p[1] = vp8_mode_contexts[cnt[1]][1]; - p[2] = vp8_mode_contexts[cnt[2]][2]; - p[3] = vp8_mode_contexts[cnt[3]][3]; - mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); - switch (mb->mode) { - case VP8_MVMODE_SPLIT: - clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); - mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; - break; - case VP8_MVMODE_ZERO: + find_near_mvs(s, mb, near, &best, cnt); + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[0]][0])) { + mb->mode = VP8_MVMODE_MV; + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[1]][1])) { + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[2]][2])) { + if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[3]][3])) { + mb->mode = VP8_MVMODE_SPLIT; + clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); + mb->mv = mb->bmv[decode_splitmvs(s, c, mb) - 1]; + } else { + clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); + mb->mv.y += read_mv_component(c, s->prob->mvc[0]); + mb->mv.x += read_mv_component(c, s->prob->mvc[1]); + } + } else + clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); + } else + clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); + } else { + mb->mode = VP8_MVMODE_ZERO; AV_ZERO32(&mb->mv); - break; - case VP8_MVMODE_NEAREST: - clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); - break; - case VP8_MVMODE_NEAR: - clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); - break; - case VP8_MVMODE_NEW: - clamp_mv(s, &mb->mv, &mb->mv, mb_x, mb_y); - mb->mv.y += + read_mv_component(c, s->prob->mvc[0]); - mb->mv.x += + read_mv_component(c, s->prob->mvc[1]); - break; } if (mb->mode != VP8_MVMODE_SPLIT) { mb->partitioning = VP8_SPLITMVMODE_NONE; @@ -786,7 +792,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); if (mb->mode == MODE_I4x4) - decode_intra4x4_modes(c, intra4x4, 4, 0); + decode_intra4x4_modes(s, c, mb_x, 0); s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); mb->ref_frame = VP56_FRAME_CURRENT; @@ -806,37 +812,30 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, * @return 0 if no coeffs were decoded * otherwise, the index of the last coeff decoded plus one */ -static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], - uint8_t probs[8][3][NUM_DCT_TOKENS-1], - int i, int zero_nhood, int16_t qmul[2]) +static int decode_block_coeffs_internal(VP56RangeCoder *c, DCTELEM block[16], + uint8_t probs[8][3][NUM_DCT_TOKENS-1], + int i, uint8_t *token_prob, int16_t qmul[2]) { - uint8_t *token_prob; - int nonzero = 0; - int coeff; - + goto skip_eob; do { - token_prob = probs[vp8_coeff_band[i]][zero_nhood]; - + int coeff; if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB - return nonzero; + return i; skip_eob: if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 - zero_nhood = 0; - token_prob = probs[vp8_coeff_band[++i]][0]; - if (i < 16) - goto skip_eob; - return nonzero; // invalid input; blocks should end with EOB + if (++i == 16) + return i; // invalid input; blocks should end with EOB + token_prob = probs[i][0]; + goto skip_eob; } if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 coeff = 1; - zero_nhood = 1; + token_prob = probs[i+1][1]; } else { - zero_nhood = 2; - if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 - coeff = vp56_rac_get_prob(c, token_prob[4]); + coeff = vp56_rac_get_prob_branchy(c, token_prob[4]); if (coeff) coeff += vp56_rac_get_prob(c, token_prob[5]); coeff += 2; @@ -858,36 +857,49 @@ skip_eob: coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); } } + token_prob = probs[i+1][2]; } - - // todo: full [16] qmat? load into register? block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; - nonzero = ++i; - } while (i < 16); + } while (++i < 16); - return nonzero; + return i; +} + +static av_always_inline +int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], + uint8_t probs[8][3][NUM_DCT_TOKENS-1], + int i, int zero_nhood, int16_t qmul[2]) +{ + uint8_t *token_prob = probs[i][zero_nhood]; + if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB + return 0; + return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul); } static av_always_inline void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9]) { - LOCAL_ALIGNED_16(DCTELEM, dc,[16]); int i, x, y, luma_start = 0, luma_ctx = 3; int nnz_pred, nnz, nnz_total = 0; int segment = s->segment; + int block_dc = 0; if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { - AV_ZERO128(dc); - AV_ZERO128(dc+8); nnz_pred = t_nnz[8] + l_nnz[8]; // decode DC values and do hadamard - nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred, + nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred, s->qmat[segment].luma_dc_qmul); l_nnz[8] = t_nnz[8] = !!nnz; - nnz_total += nnz; - s->vp8dsp.vp8_luma_dc_wht(s->block, dc); + if (nnz) { + nnz_total += nnz; + block_dc = 1; + if (nnz == 1) + s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc); + else + s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc); + } luma_start = 1; luma_ctx = 0; } @@ -898,8 +910,8 @@ void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, nnz_pred = l_nnz[y] + t_nnz[x]; nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, nnz_pred, s->qmat[segment].luma_qmul); - // nnz+luma_start may be one more than the actual last index, but we don't care - s->non_zero_count_cache[y][x] = nnz + luma_start; + // nnz+block_dc may be one more than the actual last index, but we don't care + s->non_zero_count_cache[y][x] = nnz + block_dc; t_nnz[x] = l_nnz[y] = !!nnz; nnz_total += nnz; } @@ -968,37 +980,120 @@ void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_ } static av_always_inline -int check_intra_pred_mode(int mode, int mb_x, int mb_y) +int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y) +{ + if (!mb_x) { + return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; + } else { + return mb_y ? mode : LEFT_DC_PRED8x8; + } +} + +static av_always_inline +int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y) +{ + if (!mb_x) { + return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8; + } else { + return mb_y ? mode : HOR_PRED8x8; + } +} + +static av_always_inline +int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y) { if (mode == DC_PRED8x8) { - if (!mb_x) { - mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; - } else if (!mb_y) { - mode = LEFT_DC_PRED8x8; + return check_dc_pred8x8_mode(mode, mb_x, mb_y); + } else { + return mode; + } +} + +static av_always_inline +int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y) +{ + switch (mode) { + case DC_PRED8x8: + return check_dc_pred8x8_mode(mode, mb_x, mb_y); + case VERT_PRED8x8: + return !mb_y ? DC_127_PRED8x8 : mode; + case HOR_PRED8x8: + return !mb_x ? DC_129_PRED8x8 : mode; + case PLANE_PRED8x8 /*TM*/: + return check_tm_pred8x8_mode(mode, mb_x, mb_y); + } + return mode; +} + +static av_always_inline +int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y) +{ + if (!mb_x) { + return mb_y ? VERT_VP8_PRED : DC_129_PRED; + } else { + return mb_y ? mode : HOR_VP8_PRED; + } +} + +static av_always_inline +int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf) +{ + switch (mode) { + case VERT_PRED: + if (!mb_x && mb_y) { + *copy_buf = 1; + return mode; } + /* fall-through */ + case DIAG_DOWN_LEFT_PRED: + case VERT_LEFT_PRED: + return !mb_y ? DC_127_PRED : mode; + case HOR_PRED: + if (!mb_y) { + *copy_buf = 1; + return mode; + } + /* fall-through */ + case HOR_UP_PRED: + return !mb_x ? DC_129_PRED : mode; + case TM_VP8_PRED: + return check_tm_pred4x4_mode(mode, mb_x, mb_y); + case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC + case DIAG_DOWN_RIGHT_PRED: + case VERT_RIGHT_PRED: + case HOR_DOWN_PRED: + if (!mb_y || !mb_x) + *copy_buf = 1; + return mode; } return mode; } static av_always_inline void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, - uint8_t *intra4x4, int mb_x, int mb_y) + int mb_x, int mb_y) { + AVCodecContext *avctx = s->avctx; int x, y, mode, nnz, tr; // for the first row, we need to run xchg_mb_border to init the top edge to 127 // otherwise, skip it if we aren't going to deblock - if (s->deblock_filter || !mb_y) + if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y)) xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, s->filter.simple, 1); if (mb->mode < MODE_I4x4) { - mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); + if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested + mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y); + } else { + mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y); + } s->hpc.pred16x16[mode](dst[0], s->linesize); } else { uint8_t *ptr = dst[0]; - int stride = s->keyframe ? s->b4_stride : 4; + uint8_t *intra4x4 = s->intra4x4_pred_mode_mb; + uint8_t tr_top[4] = { 127, 127, 127, 127 }; // all blocks on the right edge of the macroblock use bottom edge // the top macroblock for their topright edge @@ -1006,7 +1101,8 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, // if we're on the right edge of the frame, said edge is extended // from the top macroblock - if (mb_x == s->mb_width-1) { + if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) && + mb_x == s->mb_width-1) { tr = tr_right[-1]*0x01010101; tr_right = (uint8_t *)&tr; } @@ -1017,10 +1113,53 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, for (y = 0; y < 4; y++) { uint8_t *topright = ptr + 4 - s->linesize; for (x = 0; x < 4; x++) { - if (x == 3) + int copy = 0, linesize = s->linesize; + uint8_t *dst = ptr+4*x; + DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8]; + + if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) { + topright = tr_top; + } else if (x == 3) topright = tr_right; - s->hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s->linesize); + if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works + mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, ©); + if (copy) { + dst = copy_dst + 12; + linesize = 8; + if (!(mb_y + y)) { + copy_dst[3] = 127U; + AV_WN32A(copy_dst+4, 127U * 0x01010101U); + } else { + AV_COPY32(copy_dst+4, ptr+4*x-s->linesize); + if (!(mb_x + x)) { + copy_dst[3] = 129U; + } else { + copy_dst[3] = ptr[4*x-s->linesize-1]; + } + } + if (!(mb_x + x)) { + copy_dst[11] = + copy_dst[19] = + copy_dst[27] = + copy_dst[35] = 129U; + } else { + copy_dst[11] = ptr[4*x -1]; + copy_dst[19] = ptr[4*x+s->linesize -1]; + copy_dst[27] = ptr[4*x+s->linesize*2-1]; + copy_dst[35] = ptr[4*x+s->linesize*3-1]; + } + } + } else { + mode = intra4x4[x]; + } + s->hpc.pred4x4[mode](dst, topright, linesize); + if (copy) { + AV_COPY32(ptr+4*x , copy_dst+12); + AV_COPY32(ptr+4*x+s->linesize , copy_dst+20); + AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28); + AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36); + } nnz = s->non_zero_count_cache[y][x]; if (nnz) { @@ -1033,20 +1172,31 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, } ptr += 4*s->linesize; - intra4x4 += stride; + intra4x4 += 4; } } - mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); + if (avctx->flags & CODEC_FLAG_EMU_EDGE) { + mode = check_intra_pred8x8_mode_emuedge(s->chroma_pred_mode, mb_x, mb_y); + } else { + mode = check_intra_pred8x8_mode(s->chroma_pred_mode, mb_x, mb_y); + } s->hpc.pred8x8[mode](dst[1], s->uvlinesize); s->hpc.pred8x8[mode](dst[2], s->uvlinesize); - if (s->deblock_filter || !mb_y) + if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y)) xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, s->filter.simple, 0); } +static const uint8_t subpel_idx[3][8] = { + { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels, + // also function pointer index + { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required + { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels +}; + /** * Generic MC function. * @@ -1065,34 +1215,72 @@ void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, * @param mc_func motion compensation function pointers (bilinear or sixtap MC) */ static av_always_inline -void vp8_mc(VP8Context *s, int luma, - uint8_t *dst, uint8_t *src, const VP56mv *mv, - int x_off, int y_off, int block_w, int block_h, - int width, int height, int linesize, - vp8_mc_func mc_func[3][3]) +void vp8_mc_luma(VP8Context *s, uint8_t *dst, uint8_t *src, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, int linesize, + vp8_mc_func mc_func[3][3]) { if (AV_RN32A(mv)) { - static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; - int mx = (mv->x << luma)&7, mx_idx = idx[mx]; - int my = (mv->y << luma)&7, my_idx = idx[my]; - x_off += mv->x >> (3 - luma); - y_off += mv->y >> (3 - luma); + int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx]; + int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my]; + + x_off += mv->x >> 2; + y_off += mv->y >> 2; // edge emulation src += y_off * linesize + x_off; - if (x_off < 2 || x_off >= width - block_w - 3 || - y_off < 2 || y_off >= height - block_h - 3) { - ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize, - block_w + 5, block_h + 5, - x_off - 2, y_off - 2, width, height); - src = s->edge_emu_buffer + 2 + linesize * 2; + if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || + y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize, + block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); + src = s->edge_emu_buffer + mx_idx + linesize * my_idx; } mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); } else mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); } +static av_always_inline +void vp8_mc_chroma(VP8Context *s, uint8_t *dst1, uint8_t *dst2, uint8_t *src1, + uint8_t *src2, const VP56mv *mv, int x_off, int y_off, + int block_w, int block_h, int width, int height, int linesize, + vp8_mc_func mc_func[3][3]) +{ + if (AV_RN32A(mv)) { + int mx = mv->x&7, mx_idx = subpel_idx[0][mx]; + int my = mv->y&7, my_idx = subpel_idx[0][my]; + + x_off += mv->x >> 3; + y_off += mv->y >> 3; + + // edge emulation + src1 += y_off * linesize + x_off; + src2 += y_off * linesize + x_off; + if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] || + y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) { + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize, + block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); + src1 = s->edge_emu_buffer + mx_idx + linesize * my_idx; + mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); + + s->dsp.emulated_edge_mc(s->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize, + block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my], + x_off - mx_idx, y_off - my_idx, width, height); + src2 = s->edge_emu_buffer + mx_idx + linesize * my_idx; + mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); + } else { + mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my); + mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my); + } + } else { + mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0); + mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0); + } +} + static av_always_inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], AVFrame *ref_frame, int x_off, int y_off, @@ -1103,10 +1291,10 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3], VP56mv uvmv = *mv; /* Y */ - vp8_mc(s, 1, dst[0] + by_off * s->linesize + bx_off, - ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, - block_w, block_h, width, height, s->linesize, - s->put_pixels_tab[block_w == 8]); + vp8_mc_luma(s, dst[0] + by_off * s->linesize + bx_off, + ref_frame->data[0], mv, x_off + bx_off, y_off + by_off, + block_w, block_h, width, height, s->linesize, + s->put_pixels_tab[block_w == 8]); /* U/V */ if (s->profile == 3) { @@ -1117,14 +1305,11 @@ void vp8_mc_part(VP8Context *s, uint8_t *dst[3], bx_off >>= 1; by_off >>= 1; width >>= 1; height >>= 1; block_w >>= 1; block_h >>= 1; - vp8_mc(s, 0, dst[1] + by_off * s->uvlinesize + bx_off, - ref_frame->data[1], &uvmv, x_off + bx_off, y_off + by_off, - block_w, block_h, width, height, s->uvlinesize, - s->put_pixels_tab[1 + (block_w == 4)]); - vp8_mc(s, 0, dst[2] + by_off * s->uvlinesize + bx_off, - ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, - block_w, block_h, width, height, s->uvlinesize, - s->put_pixels_tab[1 + (block_w == 4)]); + vp8_mc_chroma(s, dst[1] + by_off * s->uvlinesize + bx_off, + dst[2] + by_off * s->uvlinesize + bx_off, ref_frame->data[1], + ref_frame->data[2], &uvmv, x_off + bx_off, y_off + by_off, + block_w, block_h, width, height, s->uvlinesize, + s->put_pixels_tab[1 + (block_w == 4)]); } /* Fetch pixels for estimated mv 4 macroblocks ahead. @@ -1134,8 +1319,8 @@ static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, i /* Don't prefetch refs that haven't been used very often this frame. */ if (s->ref_count[ref-1] > (mb_xy >> 5)) { int x_off = mb_x << 4, y_off = mb_y << 4; - int mx = mb->mv.x + x_off + 8; - int my = mb->mv.y + y_off; + int mx = (mb->mv.x>>2) + x_off + 8; + int my = (mb->mv.y>>2) + y_off; uint8_t **src= s->framep[ref]->data; int off= mx + (my + (mb_x&3)*4)*s->linesize + 64; s->dsp.prefetch(src[0]+off, s->linesize, 4); @@ -1156,10 +1341,11 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, AVFrame *ref = s->framep[mb->ref_frame]; VP56mv *bmv = mb->bmv; - if (mb->mode < VP8_MVMODE_SPLIT) { + switch (mb->partitioning) { + case VP8_SPLITMVMODE_NONE: vp8_mc_part(s, dst, ref, x_off, y_off, 0, 0, 16, 16, width, height, &mb->mv); - } else switch (mb->partitioning) { + break; case VP8_SPLITMVMODE_4x4: { int x, y; VP56mv uvmv; @@ -1167,11 +1353,11 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, /* Y */ for (y = 0; y < 4; y++) { for (x = 0; x < 4; x++) { - vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, - ref->data[0], &bmv[4*y + x], - 4*x + x_off, 4*y + y_off, 4, 4, - width, height, s->linesize, - s->put_pixels_tab[2]); + vp8_mc_luma(s, dst[0] + 4*y*s->linesize + x*4, + ref->data[0], &bmv[4*y + x], + 4*x + x_off, 4*y + y_off, 4, 4, + width, height, s->linesize, + s->put_pixels_tab[2]); } } @@ -1193,16 +1379,12 @@ void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, uvmv.x &= ~7; uvmv.y &= ~7; } - vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, - ref->data[1], &uvmv, - 4*x + x_off, 4*y + y_off, 4, 4, - width, height, s->uvlinesize, - s->put_pixels_tab[2]); - vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, - ref->data[2], &uvmv, - 4*x + x_off, 4*y + y_off, 4, 4, - width, height, s->uvlinesize, - s->put_pixels_tab[2]); + vp8_mc_chroma(s, dst[1] + 4*y*s->uvlinesize + x*4, + dst[2] + 4*y*s->uvlinesize + x*4, + ref->data[1], ref->data[2], &uvmv, + 4*x + x_off, 4*y + y_off, 4, 4, + width, height, s->uvlinesize, + s->put_pixels_tab[2]); } } break; @@ -1296,18 +1478,7 @@ static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *m if (s->lf_delta.enabled) { filter_level += s->lf_delta.ref[mb->ref_frame]; - - if (mb->ref_frame == VP56_FRAME_CURRENT) { - if (mb->mode == MODE_I4x4) - filter_level += s->lf_delta.mode[0]; - } else { - if (mb->mode == VP8_MVMODE_ZERO) - filter_level += s->lf_delta.mode[1]; - else if (mb->mode == VP8_MVMODE_SPLIT) - filter_level += s->lf_delta.mode[3]; - else - filter_level += s->lf_delta.mode[2]; - } + filter_level += s->lf_delta.mode[mb->mode]; } filter_level = av_clip(filter_level, 0, 63); @@ -1514,44 +1685,47 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); - /* Zero macroblock structures for top/left prediction from outside the frame. */ - memset(s->macroblocks, 0, (s->mb_width + s->mb_height*2)*sizeof(*s->macroblocks)); + /* Zero macroblock structures for top/top-left prediction from outside the frame. */ + memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks)); // top edge of 127 for intra prediction - memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); + if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { + s->top_border[0][15] = s->top_border[0][23] = 127; + memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1); + } memset(s->ref_count, 0, sizeof(s->ref_count)); + if (s->keyframe) + memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4); for (mb_y = 0; mb_y < s->mb_height; mb_y++) { VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2; - uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; - uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride; - int mb_xy = mb_y * s->mb_stride; + int mb_xy = mb_y*s->mb_width; uint8_t *dst[3] = { curframe->data[0] + 16*mb_y*s->linesize, curframe->data[1] + 8*mb_y*s->uvlinesize, curframe->data[2] + 8*mb_y*s->uvlinesize }; + memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock memset(s->left_nnz, 0, sizeof(s->left_nnz)); + AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101); // left edge of 129 for intra prediction - if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) + if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { for (i = 0; i < 3; i++) for (y = 0; y < 16>>!!i; y++) dst[i][y*curframe->linesize[i]-1] = 129; - if (mb_y) - memset(s->top_border, 129, sizeof(*s->top_border)); + if (mb_y == 1) // top left edge is also 129 + s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129; + } for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { - uint8_t *intra4x4_mb = s->keyframe ? intra4x4 + 4*mb_x : s->intra4x4_pred_mode_mb; - uint8_t *segment_mb = segment_map+mb_x; - /* Prefetch the current frame, 4 MBs ahead */ s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4); s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2); - decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb); + decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy); prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); @@ -1559,7 +1733,7 @@ static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); if (mb->mode <= MODE_I4x4) - intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y); + intra_predict(s, dst, mb, mb_x, mb_y); else inter_predict(s, dst, mb, mb_x, mb_y); @@ -1644,12 +1818,6 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); ff_vp8dsp_init(&s->vp8dsp); - // intra pred needs edge emulation among other things - if (avctx->flags&CODEC_FLAG_EMU_EDGE) { - av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n"); - return AVERROR_PATCHWELCOME; - } - return 0; } @@ -1659,7 +1827,7 @@ static av_cold int vp8_decode_free(AVCodecContext *avctx) return 0; } -AVCodec vp8_decoder = { +AVCodec ff_vp8_decoder = { "vp8", AVMEDIA_TYPE_VIDEO, CODEC_ID_VP8,