X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvp9.c;h=a41f3a3961523525b483d0510457dd3ecf269337;hb=aec42ebc27c46625fb7fac9d5717056044ecac92;hp=1aab6ba03c319895b5ff80ae8523396d733c9530;hpb=a0bc6b51d4f6d01eb4bcaec5489ca67037687a77;p=ffmpeg diff --git a/libavcodec/vp9.c b/libavcodec/vp9.c index 1aab6ba03c3..a41f3a39615 100644 --- a/libavcodec/vp9.c +++ b/libavcodec/vp9.c @@ -68,7 +68,7 @@ typedef struct VP9Context { ptrdiff_t y_stride, uv_stride; uint8_t ss_h, ss_v; - uint8_t last_bpp, bpp, bpp_index, bytesperpixel; + uint8_t last_bpp, bpp_index, bytesperpixel; uint8_t last_keyframe; // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads @@ -258,7 +258,8 @@ static int update_size(AVCodecContext *ctx, int w, int h) if ((res = ff_set_dimensions(ctx, w, h)) < 0) return res; - if (s->pix_fmt == AV_PIX_FMT_YUV420P) { + switch (s->pix_fmt) { + case AV_PIX_FMT_YUV420P: #if CONFIG_VP9_DXVA2_HWACCEL *fmtp++ = AV_PIX_FMT_DXVA2_VLD; #endif @@ -268,6 +269,13 @@ static int update_size(AVCodecContext *ctx, int w, int h) #if CONFIG_VP9_VAAPI_HWACCEL *fmtp++ = AV_PIX_FMT_VAAPI; #endif + break; + case AV_PIX_FMT_YUV420P10: + case AV_PIX_FMT_YUV420P12: +#if CONFIG_VP9_VAAPI_HWACCEL + *fmtp++ = AV_PIX_FMT_VAAPI; +#endif + break; } *fmtp++ = s->pix_fmt; @@ -326,10 +334,10 @@ static int update_size(AVCodecContext *ctx, int w, int h) av_freep(&s->b_base); av_freep(&s->block_base); - if (s->bpp != s->last_bpp) { - ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT); - ff_videodsp_init(&s->vdsp, s->bpp); - s->last_bpp = s->bpp; + if (s->s.h.bpp != s->last_bpp) { + ff_vp9dsp_init(&s->dsp, s->s.h.bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT); + ff_videodsp_init(&s->vdsp, s->s.h.bpp); + s->last_bpp = s->s.h.bpp; } return 0; @@ -458,8 +466,8 @@ static int read_colorspace_details(AVCodecContext *ctx) int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12 s->bpp_index = bits; - s->bpp = 8 + bits * 2; - s->bytesperpixel = (7 + s->bpp) >> 3; + s->s.h.bpp = 8 + bits * 2; + s->bytesperpixel = (7 + s->s.h.bpp) >> 3; ctx->colorspace = colorspaces[get_bits(&s->gb, 3)]; if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1 static const enum AVPixelFormat pix_fmt_rgb[3] = { @@ -571,7 +579,7 @@ static int decode_frame_header(AVCodecContext *ctx, return res; } else { s->ss_h = s->ss_v = 1; - s->bpp = 8; + s->s.h.bpp = 8; s->bpp_index = 0; s->bytesperpixel = 1; s->pix_fmt = AV_PIX_FMT_YUV420P; @@ -844,7 +852,10 @@ static int decode_frame_header(AVCodecContext *ctx, av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n"); return AVERROR_INVALIDDATA; } - ff_vp56_init_range_decoder(&s->c, data2, size2); + res = ff_vp56_init_range_decoder(&s->c, data2, size2); + if (res < 0) + return res; + if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n"); return AVERROR_INVALIDDATA; @@ -2278,7 +2289,7 @@ static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, const int16_t (*nb)[2], const int16_t *band_counts, const int16_t *qmul) { - return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p, + return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->s.h.bpp, cnt, eob, p, nnz, scan, nb, band_counts, qmul); } @@ -2288,7 +2299,7 @@ static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs, const int16_t (*nb)[2], const int16_t *band_counts, const int16_t *qmul) { - return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p, + return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->s.h.bpp, cnt, eob, p, nnz, scan, nb, band_counts, qmul); } @@ -2479,7 +2490,7 @@ static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t ** int have_top = row > 0 || y > 0; int have_left = col > s->tile_col_start || x > 0; int have_right = x < w - 1; - int bpp = s->bpp; + int bpp = s->s.h.bpp; static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = { [VERT_PRED] = { { DC_127_PRED, VERT_PRED }, { DC_127_PRED, VERT_PRED } }, @@ -2749,8 +2760,11 @@ static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2 // the longest loopfilter of the next sbrow th = (y + bh + 4 * !!my + 7) >> 6; ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (!!my * 5) than horizontally (!!mx * 4). if (x < !!mx * 3 || y < !!my * 3 || - x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) { + x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel, 160, ref_stride, @@ -2784,8 +2798,11 @@ static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc) // the longest loopfilter of the next sbrow th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v); ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (!!my * 5) than horizontally (!!mx * 4). if (x < !!mx * 3 || y < !!my * 3 || - x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) { + x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) { s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel, 160, src_stride_u, @@ -2871,7 +2888,10 @@ static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func sm // the longest loopfilter of the next sbrow th = (y + refbh_m1 + 4 + 7) >> 6; ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); - if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). + if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ref - 3 * ref_stride - 3 * bytesperpixel, 288, ref_stride, @@ -2937,7 +2957,10 @@ static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func // the longest loopfilter of the next sbrow th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v); ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0); - if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) { + // The arm/aarch64 _hv filters read one more row than what actually is + // needed, so switch to emulated edge one pixel sooner vertically + // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1). + if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) { s->vdsp.emulated_edge_mc(s->edge_emu_buffer, ref_u - 3 * src_stride_u - 3 * bytesperpixel, 288, src_stride_u, @@ -3298,13 +3321,13 @@ static void decode_b(AVCodecContext *ctx, int row, int col, s->uv_stride = f->linesize[1]; } if (b->intra) { - if (s->bpp > 8) { + if (s->s.h.bpp > 8) { intra_recon_16bpp(ctx, yoff, uvoff); } else { intra_recon_8bpp(ctx, yoff, uvoff); } } else { - if (s->bpp > 8) { + if (s->s.h.bpp > 8) { inter_recon_16bpp(ctx); } else { inter_recon_8bpp(ctx); @@ -4133,7 +4156,9 @@ FF_ENABLE_DEPRECATION_WARNINGS ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); return AVERROR_INVALIDDATA; } - ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size); + res = ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size); + if (res < 0) + return res; if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0); return AVERROR_INVALIDDATA; @@ -4341,7 +4366,7 @@ static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecCo s->gf_fmt = ssrc->gf_fmt; s->w = ssrc->w; s->h = ssrc->h; - s->bpp = ssrc->bpp; + s->s.h.bpp = ssrc->s.h.bpp; s->bpp_index = ssrc->bpp_index; s->pix_fmt = ssrc->pix_fmt; memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));