git.sesse.net Git - ffmpeg/blob - libavcodec/vp9.c

   1 /*
   2  * VP9 compatible video decoder
   3  *
   4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
   5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include "avcodec.h"
  25 #include "get_bits.h"
  26 #include "internal.h"
  27 #include "profiles.h"
  28 #include "thread.h"
  29 #include "videodsp.h"
  30 #include "vp56.h"
  31 #include "vp9.h"
  32 #include "vp9data.h"
  33 #include "vp9dec.h"
  34 #include "libavutil/avassert.h"
  35 #include "libavutil/pixdesc.h"
  36
  37 #define VP9_SYNCCODE 0x498342
  38
  39 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
  40 {
  41     ff_thread_release_buffer(avctx, &f->tf);
  42     av_buffer_unref(&f->extradata);
  43     av_buffer_unref(&f->hwaccel_priv_buf);
  44     f->segmentation_map = NULL;
  45     f->hwaccel_picture_private = NULL;
  46 }
  47
  48 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
  49 {
  50     VP9Context *s = avctx->priv_data;
  51     int ret, sz;
  52
  53     ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
  54     if (ret < 0)
  55         return ret;
  56
  57     sz = 64 * s->sb_cols * s->sb_rows;
  58     f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
  59     if (!f->extradata) {
  60         goto fail;
  61     }
  62
  63     f->segmentation_map = f->extradata->data;
  64     f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
  65
  66     if (avctx->hwaccel) {
  67         const AVHWAccel *hwaccel = avctx->hwaccel;
  68         av_assert0(!f->hwaccel_picture_private);
  69         if (hwaccel->frame_priv_data_size) {
  70             f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
  71             if (!f->hwaccel_priv_buf)
  72                 goto fail;
  73             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
  74         }
  75     }
  76
  77     return 0;
  78
  79 fail:
  80     vp9_frame_unref(avctx, f);
  81     return AVERROR(ENOMEM);
  82 }
  83
  84 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
  85 {
  86     int ret;
  87
  88     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
  89     if (ret < 0)
  90         return ret;
  91
  92     dst->extradata = av_buffer_ref(src->extradata);
  93     if (!dst->extradata)
  94         goto fail;
  95
  96     dst->segmentation_map = src->segmentation_map;
  97     dst->mv = src->mv;
  98     dst->uses_2pass = src->uses_2pass;
  99
 100     if (src->hwaccel_picture_private) {
 101         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
 102         if (!dst->hwaccel_priv_buf)
 103             goto fail;
 104         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
 105     }
 106
 107     return 0;
 108
 109 fail:
 110     vp9_frame_unref(avctx, dst);
 111     return AVERROR(ENOMEM);
 112 }
 113
 114 static int update_size(AVCodecContext *avctx, int w, int h)
 115 {
 116 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
 117     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
 118     VP9Context *s = avctx->priv_data;
 119     uint8_t *p;
 120     int bytesperpixel = s->bytesperpixel, ret, cols, rows;
 121
 122     av_assert0(w > 0 && h > 0);
 123
 124     if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
 125         if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
 126             return ret;
 127
 128         switch (s->pix_fmt) {
 129         case AV_PIX_FMT_YUV420P:
 130 #if CONFIG_VP9_DXVA2_HWACCEL
 131             *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 132 #endif
 133 #if CONFIG_VP9_D3D11VA_HWACCEL
 134             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
 135 #endif
 136 #if CONFIG_VP9_VAAPI_HWACCEL
 137             *fmtp++ = AV_PIX_FMT_VAAPI;
 138 #endif
 139             break;
 140         case AV_PIX_FMT_YUV420P10:
 141         case AV_PIX_FMT_YUV420P12:
 142 #if CONFIG_VP9_VAAPI_HWACCEL
 143             *fmtp++ = AV_PIX_FMT_VAAPI;
 144 #endif
 145             break;
 146         }
 147
 148         *fmtp++ = s->pix_fmt;
 149         *fmtp = AV_PIX_FMT_NONE;
 150
 151         ret = ff_thread_get_format(avctx, pix_fmts);
 152         if (ret < 0)
 153             return ret;
 154
 155         avctx->pix_fmt = ret;
 156         s->gf_fmt  = s->pix_fmt;
 157         s->w = w;
 158         s->h = h;
 159     }
 160
 161     cols = (w + 7) >> 3;
 162     rows = (h + 7) >> 3;
 163
 164     if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
 165         return 0;
 166
 167     s->last_fmt  = s->pix_fmt;
 168     s->sb_cols   = (w + 63) >> 6;
 169     s->sb_rows   = (h + 63) >> 6;
 170     s->cols      = (w + 7) >> 3;
 171     s->rows      = (h + 7) >> 3;
 172
 173 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
 174     av_freep(&s->intra_pred_data[0]);
 175     // FIXME we slightly over-allocate here for subsampled chroma, but a little
 176     // bit of padding shouldn't affect performance...
 177     p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
 178                                 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
 179     if (!p)
 180         return AVERROR(ENOMEM);
 181     assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
 182     assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
 183     assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
 184     assign(s->above_y_nnz_ctx,     uint8_t *,             16);
 185     assign(s->above_mode_ctx,      uint8_t *,             16);
 186     assign(s->above_mv_ctx,        VP56mv(*)[2],          16);
 187     assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
 188     assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
 189     assign(s->above_partition_ctx, uint8_t *,              8);
 190     assign(s->above_skip_ctx,      uint8_t *,              8);
 191     assign(s->above_txfm_ctx,      uint8_t *,              8);
 192     assign(s->above_segpred_ctx,   uint8_t *,              8);
 193     assign(s->above_intra_ctx,     uint8_t *,              8);
 194     assign(s->above_comp_ctx,      uint8_t *,              8);
 195     assign(s->above_ref_ctx,       uint8_t *,              8);
 196     assign(s->above_filter_ctx,    uint8_t *,              8);
 197     assign(s->lflvl,               VP9Filter *,            1);
 198 #undef assign
 199
 200     // these will be re-allocated a little later
 201     av_freep(&s->b_base);
 202     av_freep(&s->block_base);
 203
 204     if (s->s.h.bpp != s->last_bpp) {
 205         ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
 206         ff_videodsp_init(&s->vdsp, s->s.h.bpp);
 207         s->last_bpp = s->s.h.bpp;
 208     }
 209
 210     return 0;
 211 }
 212
 213 static int update_block_buffers(AVCodecContext *avctx)
 214 {
 215     VP9Context *s = avctx->priv_data;
 216     int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
 217
 218     if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
 219         return 0;
 220
 221     av_free(s->b_base);
 222     av_free(s->block_base);
 223     chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
 224     chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
 225     if (s->s.frames[CUR_FRAME].uses_2pass) {
 226         int sbs = s->sb_cols * s->sb_rows;
 227
 228         s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
 229         s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 230                                     16 * 16 + 2 * chroma_eobs) * sbs);
 231         if (!s->b_base || !s->block_base)
 232             return AVERROR(ENOMEM);
 233         s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
 234         s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
 235         s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
 236         s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
 237         s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
 238     } else {
 239         s->b_base = av_malloc(sizeof(VP9Block));
 240         s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 241                                    16 * 16 + 2 * chroma_eobs);
 242         if (!s->b_base || !s->block_base)
 243             return AVERROR(ENOMEM);
 244         s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
 245         s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
 246         s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
 247         s->uveob_base[0] = s->eob_base + 16 * 16;
 248         s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
 249     }
 250     s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
 251
 252     return 0;
 253 }
 254
 255 // The sign bit is at the end, not the start, of a bit sequence
 256 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
 257 {
 258     int v = get_bits(gb, n);
 259     return get_bits1(gb) ? -v : v;
 260 }
 261
 262 static av_always_inline int inv_recenter_nonneg(int v, int m)
 263 {
 264     if (v > 2 * m)
 265         return v;
 266     if (v & 1)
 267         return m - ((v + 1) >> 1);
 268     return m + (v >> 1);
 269 }
 270
 271 // differential forward probability updates
 272 static int update_prob(VP56RangeCoder *c, int p)
 273 {
 274     static const int inv_map_table[255] = {
 275           7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
 276         189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
 277          10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
 278          25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
 279          40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
 280          55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
 281          70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
 282          86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
 283         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
 284         116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
 285         131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
 286         146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 287         161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
 288         177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
 289         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
 290         207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
 291         222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
 292         237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
 293         252, 253, 253,
 294     };
 295     int d;
 296
 297     /* This code is trying to do a differential probability update. For a
 298      * current probability A in the range [1, 255], the difference to a new
 299      * probability of any value can be expressed differentially as 1-A, 255-A
 300      * where some part of this (absolute range) exists both in positive as
 301      * well as the negative part, whereas another part only exists in one
 302      * half. We're trying to code this shared part differentially, i.e.
 303      * times two where the value of the lowest bit specifies the sign, and
 304      * the single part is then coded on top of this. This absolute difference
 305      * then again has a value of [0, 254], but a bigger value in this range
 306      * indicates that we're further away from the original value A, so we
 307      * can code this as a VLC code, since higher values are increasingly
 308      * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
 309      * updates vs. the 'fine, exact' updates further down the range, which
 310      * adds one extra dimension to this differential update model. */
 311
 312     if (!vp8_rac_get(c)) {
 313         d = vp8_rac_get_uint(c, 4) + 0;
 314     } else if (!vp8_rac_get(c)) {
 315         d = vp8_rac_get_uint(c, 4) + 16;
 316     } else if (!vp8_rac_get(c)) {
 317         d = vp8_rac_get_uint(c, 5) + 32;
 318     } else {
 319         d = vp8_rac_get_uint(c, 7);
 320         if (d >= 65)
 321             d = (d << 1) - 65 + vp8_rac_get(c);
 322         d += 64;
 323         av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
 324     }
 325
 326     return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
 327                     255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 328 }
 329
 330 static int read_colorspace_details(AVCodecContext *avctx)
 331 {
 332     static const enum AVColorSpace colorspaces[8] = {
 333         AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
 334         AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
 335     };
 336     VP9Context *s = avctx->priv_data;
 337     int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
 338
 339     s->bpp_index = bits;
 340     s->s.h.bpp = 8 + bits * 2;
 341     s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
 342     avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
 343     if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
 344         static const enum AVPixelFormat pix_fmt_rgb[3] = {
 345             AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
 346         };
 347         s->ss_h = s->ss_v = 0;
 348         avctx->color_range = AVCOL_RANGE_JPEG;
 349         s->pix_fmt = pix_fmt_rgb[bits];
 350         if (avctx->profile & 1) {
 351             if (get_bits1(&s->gb)) {
 352                 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
 353                 return AVERROR_INVALIDDATA;
 354             }
 355         } else {
 356             av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
 357                    avctx->profile);
 358             return AVERROR_INVALIDDATA;
 359         }
 360     } else {
 361         static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
 362             { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
 363               { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
 364             { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
 365               { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
 366             { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
 367               { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
 368         };
 369         avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
 370         if (avctx->profile & 1) {
 371             s->ss_h = get_bits1(&s->gb);
 372             s->ss_v = get_bits1(&s->gb);
 373             s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
 374             if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
 375                 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
 376                        avctx->profile);
 377                 return AVERROR_INVALIDDATA;
 378             } else if (get_bits1(&s->gb)) {
 379                 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
 380                        avctx->profile);
 381                 return AVERROR_INVALIDDATA;
 382             }
 383         } else {
 384             s->ss_h = s->ss_v = 1;
 385             s->pix_fmt = pix_fmt_for_ss[bits][1][1];
 386         }
 387     }
 388
 389     return 0;
 390 }
 391
 392 static int decode_frame_header(AVCodecContext *avctx,
 393                                const uint8_t *data, int size, int *ref)
 394 {
 395     VP9Context *s = avctx->priv_data;
 396     int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
 397     int last_invisible;
 398     const uint8_t *data2;
 399
 400     /* general header */
 401     if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
 402         av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
 403         return ret;
 404     }
 405     if (get_bits(&s->gb, 2) != 0x2) { // frame marker
 406         av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
 407         return AVERROR_INVALIDDATA;
 408     }
 409     avctx->profile  = get_bits1(&s->gb);
 410     avctx->profile |= get_bits1(&s->gb) << 1;
 411     if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
 412     if (avctx->profile > 3) {
 413         av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
 414         return AVERROR_INVALIDDATA;
 415     }
 416     s->s.h.profile = avctx->profile;
 417     if (get_bits1(&s->gb)) {
 418         *ref = get_bits(&s->gb, 3);
 419         return 0;
 420     }
 421
 422     s->last_keyframe  = s->s.h.keyframe;
 423     s->s.h.keyframe   = !get_bits1(&s->gb);
 424
 425     last_invisible   = s->s.h.invisible;
 426     s->s.h.invisible = !get_bits1(&s->gb);
 427     s->s.h.errorres  = get_bits1(&s->gb);
 428     s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
 429
 430     if (s->s.h.keyframe) {
 431         if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 432             av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 433             return AVERROR_INVALIDDATA;
 434         }
 435         if ((ret = read_colorspace_details(avctx)) < 0)
 436             return ret;
 437         // for profile 1, here follows the subsampling bits
 438         s->s.h.refreshrefmask = 0xff;
 439         w = get_bits(&s->gb, 16) + 1;
 440         h = get_bits(&s->gb, 16) + 1;
 441         if (get_bits1(&s->gb)) // display size
 442             skip_bits(&s->gb, 32);
 443     } else {
 444         s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
 445         s->s.h.resetctx  = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
 446         if (s->s.h.intraonly) {
 447             if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 448                 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 449                 return AVERROR_INVALIDDATA;
 450             }
 451             if (avctx->profile >= 1) {
 452                 if ((ret = read_colorspace_details(avctx)) < 0)
 453                     return ret;
 454             } else {
 455                 s->ss_h = s->ss_v = 1;
 456                 s->s.h.bpp = 8;
 457                 s->bpp_index = 0;
 458                 s->bytesperpixel = 1;
 459                 s->pix_fmt = AV_PIX_FMT_YUV420P;
 460                 avctx->colorspace = AVCOL_SPC_BT470BG;
 461                 avctx->color_range = AVCOL_RANGE_JPEG;
 462             }
 463             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 464             w = get_bits(&s->gb, 16) + 1;
 465             h = get_bits(&s->gb, 16) + 1;
 466             if (get_bits1(&s->gb)) // display size
 467                 skip_bits(&s->gb, 32);
 468         } else {
 469             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 470             s->s.h.refidx[0]      = get_bits(&s->gb, 3);
 471             s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
 472             s->s.h.refidx[1]      = get_bits(&s->gb, 3);
 473             s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
 474             s->s.h.refidx[2]      = get_bits(&s->gb, 3);
 475             s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
 476             if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
 477                 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
 478                 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
 479                 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
 480                 return AVERROR_INVALIDDATA;
 481             }
 482             if (get_bits1(&s->gb)) {
 483                 w = s->s.refs[s->s.h.refidx[0]].f->width;
 484                 h = s->s.refs[s->s.h.refidx[0]].f->height;
 485             } else if (get_bits1(&s->gb)) {
 486                 w = s->s.refs[s->s.h.refidx[1]].f->width;
 487                 h = s->s.refs[s->s.h.refidx[1]].f->height;
 488             } else if (get_bits1(&s->gb)) {
 489                 w = s->s.refs[s->s.h.refidx[2]].f->width;
 490                 h = s->s.refs[s->s.h.refidx[2]].f->height;
 491             } else {
 492                 w = get_bits(&s->gb, 16) + 1;
 493                 h = get_bits(&s->gb, 16) + 1;
 494             }
 495             // Note that in this code, "CUR_FRAME" is actually before we
 496             // have formally allocated a frame, and thus actually represents
 497             // the _last_ frame
 498             s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
 499                                        s->s.frames[CUR_FRAME].tf.f->height == h;
 500             if (get_bits1(&s->gb)) // display size
 501                 skip_bits(&s->gb, 32);
 502             s->s.h.highprecisionmvs = get_bits1(&s->gb);
 503             s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
 504                                                   get_bits(&s->gb, 2);
 505             s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
 506                                   s->s.h.signbias[0] != s->s.h.signbias[2];
 507             if (s->s.h.allowcompinter) {
 508                 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
 509                     s->s.h.fixcompref    = 2;
 510                     s->s.h.varcompref[0] = 0;
 511                     s->s.h.varcompref[1] = 1;
 512                 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
 513                     s->s.h.fixcompref    = 1;
 514                     s->s.h.varcompref[0] = 0;
 515                     s->s.h.varcompref[1] = 2;
 516                 } else {
 517                     s->s.h.fixcompref    = 0;
 518                     s->s.h.varcompref[0] = 1;
 519                     s->s.h.varcompref[1] = 2;
 520                 }
 521             }
 522         }
 523     }
 524     s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
 525     s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
 526     s->s.h.framectxid   = c = get_bits(&s->gb, 2);
 527     if (s->s.h.keyframe || s->s.h.intraonly)
 528         s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
 529
 530     /* loopfilter header data */
 531     if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
 532         // reset loopfilter defaults
 533         s->s.h.lf_delta.ref[0] = 1;
 534         s->s.h.lf_delta.ref[1] = 0;
 535         s->s.h.lf_delta.ref[2] = -1;
 536         s->s.h.lf_delta.ref[3] = -1;
 537         s->s.h.lf_delta.mode[0] = 0;
 538         s->s.h.lf_delta.mode[1] = 0;
 539         memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
 540     }
 541     s->s.h.filter.level = get_bits(&s->gb, 6);
 542     sharp = get_bits(&s->gb, 3);
 543     // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
 544     // the old cache values since they are still valid
 545     if (s->s.h.filter.sharpness != sharp)
 546         memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut));
 547     s->s.h.filter.sharpness = sharp;
 548     if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
 549         if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
 550             for (i = 0; i < 4; i++)
 551                 if (get_bits1(&s->gb))
 552                     s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
 553             for (i = 0; i < 2; i++)
 554                 if (get_bits1(&s->gb))
 555                     s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
 556         }
 557     }
 558
 559     /* quantization header data */
 560     s->s.h.yac_qi      = get_bits(&s->gb, 8);
 561     s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 562     s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 563     s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 564     s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
 565                        s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
 566     if (s->s.h.lossless)
 567         avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
 568
 569     /* segmentation header info */
 570     if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
 571         if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
 572             for (i = 0; i < 7; i++)
 573                 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
 574                                  get_bits(&s->gb, 8) : 255;
 575             if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
 576                 for (i = 0; i < 3; i++)
 577                     s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
 578                                          get_bits(&s->gb, 8) : 255;
 579         }
 580
 581         if (get_bits1(&s->gb)) {
 582             s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
 583             for (i = 0; i < 8; i++) {
 584                 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
 585                     s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
 586                 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
 587                     s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
 588                 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
 589                     s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
 590                 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
 591             }
 592         }
 593     }
 594
 595     // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
 596     for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
 597         int qyac, qydc, quvac, quvdc, lflvl, sh;
 598
 599         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
 600             if (s->s.h.segmentation.absolute_vals)
 601                 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
 602             else
 603                 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
 604         } else {
 605             qyac  = s->s.h.yac_qi;
 606         }
 607         qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
 608         quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
 609         quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
 610         qyac  = av_clip_uintp2(qyac, 8);
 611
 612         s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
 613         s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
 614         s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
 615         s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
 616
 617         sh = s->s.h.filter.level >= 32;
 618         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
 619             if (s->s.h.segmentation.absolute_vals)
 620                 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
 621             else
 622                 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
 623         } else {
 624             lflvl  = s->s.h.filter.level;
 625         }
 626         if (s->s.h.lf_delta.enabled) {
 627             s->s.h.segmentation.feat[i].lflvl[0][0] =
 628             s->s.h.segmentation.feat[i].lflvl[0][1] =
 629                 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
 630             for (j = 1; j < 4; j++) {
 631                 s->s.h.segmentation.feat[i].lflvl[j][0] =
 632                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 633                                              s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
 634                 s->s.h.segmentation.feat[i].lflvl[j][1] =
 635                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 636                                              s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
 637             }
 638         } else {
 639             memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
 640                    sizeof(s->s.h.segmentation.feat[i].lflvl));
 641         }
 642     }
 643
 644     /* tiling info */
 645     if ((ret = update_size(avctx, w, h)) < 0) {
 646         av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
 647                w, h, s->pix_fmt);
 648         return ret;
 649     }
 650     for (s->s.h.tiling.log2_tile_cols = 0;
 651          s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
 652          s->s.h.tiling.log2_tile_cols++) ;
 653     for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
 654     max = FFMAX(0, max - 1);
 655     while (max > s->s.h.tiling.log2_tile_cols) {
 656         if (get_bits1(&s->gb))
 657             s->s.h.tiling.log2_tile_cols++;
 658         else
 659             break;
 660     }
 661     s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
 662     s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
 663     if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
 664         s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
 665         s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
 666                                  sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols);
 667         if (!s->c_b) {
 668             av_log(avctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
 669             return AVERROR(ENOMEM);
 670         }
 671     }
 672
 673     /* check reference frames */
 674     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 675         for (i = 0; i < 3; i++) {
 676             AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
 677             int refw = ref->width, refh = ref->height;
 678
 679             if (ref->format != avctx->pix_fmt) {
 680                 av_log(avctx, AV_LOG_ERROR,
 681                        "Ref pixfmt (%s) did not match current frame (%s)",
 682                        av_get_pix_fmt_name(ref->format),
 683                        av_get_pix_fmt_name(avctx->pix_fmt));
 684                 return AVERROR_INVALIDDATA;
 685             } else if (refw == w && refh == h) {
 686                 s->mvscale[i][0] = s->mvscale[i][1] = 0;
 687             } else {
 688                 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
 689                     av_log(avctx, AV_LOG_ERROR,
 690                            "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
 691                            refw, refh, w, h);
 692                     return AVERROR_INVALIDDATA;
 693                 }
 694                 s->mvscale[i][0] = (refw << 14) / w;
 695                 s->mvscale[i][1] = (refh << 14) / h;
 696                 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
 697                 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
 698             }
 699         }
 700     }
 701
 702     if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
 703         s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
 704                            s->prob_ctx[3].p = ff_vp9_default_probs;
 705         memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
 706                sizeof(ff_vp9_default_coef_probs));
 707         memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
 708                sizeof(ff_vp9_default_coef_probs));
 709         memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
 710                sizeof(ff_vp9_default_coef_probs));
 711         memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
 712                sizeof(ff_vp9_default_coef_probs));
 713     } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
 714         s->prob_ctx[c].p = ff_vp9_default_probs;
 715         memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
 716                sizeof(ff_vp9_default_coef_probs));
 717     }
 718
 719     // next 16 bits is size of the rest of the header (arith-coded)
 720     s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
 721     s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
 722
 723     data2 = align_get_bits(&s->gb);
 724     if (size2 > size - (data2 - data)) {
 725         av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
 726         return AVERROR_INVALIDDATA;
 727     }
 728     ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
 729     if (ret < 0)
 730         return ret;
 731
 732     if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
 733         av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
 734         return AVERROR_INVALIDDATA;
 735     }
 736
 737     if (s->s.h.keyframe || s->s.h.intraonly) {
 738         memset(s->counts.coef, 0, sizeof(s->counts.coef));
 739         memset(s->counts.eob,  0, sizeof(s->counts.eob));
 740     } else {
 741         memset(&s->counts, 0, sizeof(s->counts));
 742     }
 743     /* FIXME is it faster to not copy here, but do it down in the fw updates
 744      * as explicit copies if the fw update is missing (and skip the copy upon
 745      * fw update)? */
 746     s->prob.p = s->prob_ctx[c].p;
 747
 748     // txfm updates
 749     if (s->s.h.lossless) {
 750         s->s.h.txfmmode = TX_4X4;
 751     } else {
 752         s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
 753         if (s->s.h.txfmmode == 3)
 754             s->s.h.txfmmode += vp8_rac_get(&s->c);
 755
 756         if (s->s.h.txfmmode == TX_SWITCHABLE) {
 757             for (i = 0; i < 2; i++)
 758                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 759                     s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
 760             for (i = 0; i < 2; i++)
 761                 for (j = 0; j < 2; j++)
 762                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 763                         s->prob.p.tx16p[i][j] =
 764                             update_prob(&s->c, s->prob.p.tx16p[i][j]);
 765             for (i = 0; i < 2; i++)
 766                 for (j = 0; j < 3; j++)
 767                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 768                         s->prob.p.tx32p[i][j] =
 769                             update_prob(&s->c, s->prob.p.tx32p[i][j]);
 770         }
 771     }
 772
 773     // coef updates
 774     for (i = 0; i < 4; i++) {
 775         uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
 776         if (vp8_rac_get(&s->c)) {
 777             for (j = 0; j < 2; j++)
 778                 for (k = 0; k < 2; k++)
 779                     for (l = 0; l < 6; l++)
 780                         for (m = 0; m < 6; m++) {
 781                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 782                             uint8_t *r = ref[j][k][l][m];
 783                             if (m >= 3 && l == 0) // dc only has 3 pt
 784                                 break;
 785                             for (n = 0; n < 3; n++) {
 786                                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 787                                     p[n] = update_prob(&s->c, r[n]);
 788                                 else
 789                                     p[n] = r[n];
 790                             }
 791                             p[3] = 0;
 792                         }
 793         } else {
 794             for (j = 0; j < 2; j++)
 795                 for (k = 0; k < 2; k++)
 796                     for (l = 0; l < 6; l++)
 797                         for (m = 0; m < 6; m++) {
 798                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 799                             uint8_t *r = ref[j][k][l][m];
 800                             if (m > 3 && l == 0) // dc only has 3 pt
 801                                 break;
 802                             memcpy(p, r, 3);
 803                             p[3] = 0;
 804                         }
 805         }
 806         if (s->s.h.txfmmode == i)
 807             break;
 808     }
 809
 810     // mode updates
 811     for (i = 0; i < 3; i++)
 812         if (vp56_rac_get_prob_branchy(&s->c, 252))
 813             s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
 814     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 815         for (i = 0; i < 7; i++)
 816             for (j = 0; j < 3; j++)
 817                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 818                     s->prob.p.mv_mode[i][j] =
 819                         update_prob(&s->c, s->prob.p.mv_mode[i][j]);
 820
 821         if (s->s.h.filtermode == FILTER_SWITCHABLE)
 822             for (i = 0; i < 4; i++)
 823                 for (j = 0; j < 2; j++)
 824                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 825                         s->prob.p.filter[i][j] =
 826                             update_prob(&s->c, s->prob.p.filter[i][j]);
 827
 828         for (i = 0; i < 4; i++)
 829             if (vp56_rac_get_prob_branchy(&s->c, 252))
 830                 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
 831
 832         if (s->s.h.allowcompinter) {
 833             s->s.h.comppredmode = vp8_rac_get(&s->c);
 834             if (s->s.h.comppredmode)
 835                 s->s.h.comppredmode += vp8_rac_get(&s->c);
 836             if (s->s.h.comppredmode == PRED_SWITCHABLE)
 837                 for (i = 0; i < 5; i++)
 838                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 839                         s->prob.p.comp[i] =
 840                             update_prob(&s->c, s->prob.p.comp[i]);
 841         } else {
 842             s->s.h.comppredmode = PRED_SINGLEREF;
 843         }
 844
 845         if (s->s.h.comppredmode != PRED_COMPREF) {
 846             for (i = 0; i < 5; i++) {
 847                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 848                     s->prob.p.single_ref[i][0] =
 849                         update_prob(&s->c, s->prob.p.single_ref[i][0]);
 850                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 851                     s->prob.p.single_ref[i][1] =
 852                         update_prob(&s->c, s->prob.p.single_ref[i][1]);
 853             }
 854         }
 855
 856         if (s->s.h.comppredmode != PRED_SINGLEREF) {
 857             for (i = 0; i < 5; i++)
 858                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 859                     s->prob.p.comp_ref[i] =
 860                         update_prob(&s->c, s->prob.p.comp_ref[i]);
 861         }
 862
 863         for (i = 0; i < 4; i++)
 864             for (j = 0; j < 9; j++)
 865                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 866                     s->prob.p.y_mode[i][j] =
 867                         update_prob(&s->c, s->prob.p.y_mode[i][j]);
 868
 869         for (i = 0; i < 4; i++)
 870             for (j = 0; j < 4; j++)
 871                 for (k = 0; k < 3; k++)
 872                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 873                         s->prob.p.partition[3 - i][j][k] =
 874                             update_prob(&s->c,
 875                                         s->prob.p.partition[3 - i][j][k]);
 876
 877         // mv fields don't use the update_prob subexp model for some reason
 878         for (i = 0; i < 3; i++)
 879             if (vp56_rac_get_prob_branchy(&s->c, 252))
 880                 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 881
 882         for (i = 0; i < 2; i++) {
 883             if (vp56_rac_get_prob_branchy(&s->c, 252))
 884                 s->prob.p.mv_comp[i].sign =
 885                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 886
 887             for (j = 0; j < 10; j++)
 888                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 889                     s->prob.p.mv_comp[i].classes[j] =
 890                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 891
 892             if (vp56_rac_get_prob_branchy(&s->c, 252))
 893                 s->prob.p.mv_comp[i].class0 =
 894                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 895
 896             for (j = 0; j < 10; j++)
 897                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 898                     s->prob.p.mv_comp[i].bits[j] =
 899                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 900         }
 901
 902         for (i = 0; i < 2; i++) {
 903             for (j = 0; j < 2; j++)
 904                 for (k = 0; k < 3; k++)
 905                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 906                         s->prob.p.mv_comp[i].class0_fp[j][k] =
 907                             (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 908
 909             for (j = 0; j < 3; j++)
 910                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 911                     s->prob.p.mv_comp[i].fp[j] =
 912                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 913         }
 914
 915         if (s->s.h.highprecisionmvs) {
 916             for (i = 0; i < 2; i++) {
 917                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 918                     s->prob.p.mv_comp[i].class0_hp =
 919                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 920
 921                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 922                     s->prob.p.mv_comp[i].hp =
 923                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 924             }
 925         }
 926     }
 927
 928     return (data2 - data) + size2;
 929 }
 930
 931 static void decode_sb(AVCodecContext *avctx, int row, int col, VP9Filter *lflvl,
 932                       ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
 933 {
 934     VP9Context *s = avctx->priv_data;
 935     int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
 936             (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
 937     const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
 938                                                      s->prob.p.partition[bl][c];
 939     enum BlockPartition bp;
 940     ptrdiff_t hbs = 4 >> bl;
 941     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
 942     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
 943     int bytesperpixel = s->bytesperpixel;
 944
 945     if (bl == BL_8X8) {
 946         bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
 947         ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
 948     } else if (col + hbs < s->cols) { // FIXME why not <=?
 949         if (row + hbs < s->rows) { // FIXME why not <=?
 950             bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
 951             switch (bp) {
 952             case PARTITION_NONE:
 953                 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
 954                 break;
 955             case PARTITION_H:
 956                 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
 957                 yoff  += hbs * 8 * y_stride;
 958                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
 959                 ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
 960                 break;
 961             case PARTITION_V:
 962                 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
 963                 yoff  += hbs * 8 * bytesperpixel;
 964                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
 965                 ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
 966                 break;
 967             case PARTITION_SPLIT:
 968                 decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
 969                 decode_sb(avctx, row, col + hbs, lflvl,
 970                           yoff + 8 * hbs * bytesperpixel,
 971                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
 972                 yoff  += hbs * 8 * y_stride;
 973                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
 974                 decode_sb(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
 975                 decode_sb(avctx, row + hbs, col + hbs, lflvl,
 976                           yoff + 8 * hbs * bytesperpixel,
 977                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
 978                 break;
 979             default:
 980                 av_assert0(0);
 981             }
 982         } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
 983             bp = PARTITION_SPLIT;
 984             decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
 985             decode_sb(avctx, row, col + hbs, lflvl,
 986                       yoff + 8 * hbs * bytesperpixel,
 987                       uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
 988         } else {
 989             bp = PARTITION_H;
 990             ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
 991         }
 992     } else if (row + hbs < s->rows) { // FIXME why not <=?
 993         if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
 994             bp = PARTITION_SPLIT;
 995             decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
 996             yoff  += hbs * 8 * y_stride;
 997             uvoff += hbs * 8 * uv_stride >> s->ss_v;
 998             decode_sb(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
 999         } else {
1000             bp = PARTITION_V;
1001             ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
1002         }
1003     } else {
1004         bp = PARTITION_SPLIT;
1005         decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
1006     }
1007     s->counts.partition[bl][c][bp]++;
1008 }
1009
1010 static void decode_sb_mem(AVCodecContext *avctx, int row, int col, VP9Filter *lflvl,
1011                           ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1012 {
1013     VP9Context *s = avctx->priv_data;
1014     VP9Block *b = s->b;
1015     ptrdiff_t hbs = 4 >> bl;
1016     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1017     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1018     int bytesperpixel = s->bytesperpixel;
1019
1020     if (bl == BL_8X8) {
1021         av_assert2(b->bl == BL_8X8);
1022         ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1023     } else if (s->b->bl == bl) {
1024         ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1025         if (b->bp == PARTITION_H && row + hbs < s->rows) {
1026             yoff  += hbs * 8 * y_stride;
1027             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1028             ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1029         } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1030             yoff  += hbs * 8 * bytesperpixel;
1031             uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1032             ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1033         }
1034     } else {
1035         decode_sb_mem(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
1036         if (col + hbs < s->cols) { // FIXME why not <=?
1037             if (row + hbs < s->rows) {
1038                 decode_sb_mem(avctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1039                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1040                 yoff  += hbs * 8 * y_stride;
1041                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1042                 decode_sb_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1043                 decode_sb_mem(avctx, row + hbs, col + hbs, lflvl,
1044                               yoff + 8 * hbs * bytesperpixel,
1045                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1046             } else {
1047                 yoff  += hbs * 8 * bytesperpixel;
1048                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1049                 decode_sb_mem(avctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1050             }
1051         } else if (row + hbs < s->rows) {
1052             yoff  += hbs * 8 * y_stride;
1053             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1054             decode_sb_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1055         }
1056     }
1057 }
1058
1059 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1060 {
1061     int sb_start = ( idx      * n) >> log2_n;
1062     int sb_end   = ((idx + 1) * n) >> log2_n;
1063     *start = FFMIN(sb_start, n) << 3;
1064     *end   = FFMIN(sb_end,   n) << 3;
1065 }
1066
1067 static void free_buffers(VP9Context *s)
1068 {
1069     av_freep(&s->intra_pred_data[0]);
1070     av_freep(&s->b_base);
1071     av_freep(&s->block_base);
1072 }
1073
1074 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1075 {
1076     VP9Context *s = avctx->priv_data;
1077     int i;
1078
1079     for (i = 0; i < 3; i++) {
1080         if (s->s.frames[i].tf.f->buf[0])
1081             vp9_frame_unref(avctx, &s->s.frames[i]);
1082         av_frame_free(&s->s.frames[i].tf.f);
1083     }
1084     for (i = 0; i < 8; i++) {
1085         if (s->s.refs[i].f->buf[0])
1086             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1087         av_frame_free(&s->s.refs[i].f);
1088         if (s->next_refs[i].f->buf[0])
1089             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1090         av_frame_free(&s->next_refs[i].f);
1091     }
1092     free_buffers(s);
1093     av_freep(&s->c_b);
1094     s->c_b_size = 0;
1095
1096     return 0;
1097 }
1098
1099
1100 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1101                             int *got_frame, AVPacket *pkt)
1102 {
1103     const uint8_t *data = pkt->data;
1104     int size = pkt->size;
1105     VP9Context *s = avctx->priv_data;
1106     int ret, tile_row, tile_col, i, ref, row, col;
1107     int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1108                             (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1109     ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1110     AVFrame *f;
1111     int bytesperpixel;
1112
1113     if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1114         return ret;
1115     } else if (ret == 0) {
1116         if (!s->s.refs[ref].f->buf[0]) {
1117             av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1118             return AVERROR_INVALIDDATA;
1119         }
1120         if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1121             return ret;
1122         ((AVFrame *)frame)->pts = pkt->pts;
1123 #if FF_API_PKT_PTS
1124 FF_DISABLE_DEPRECATION_WARNINGS
1125         ((AVFrame *)frame)->pkt_pts = pkt->pts;
1126 FF_ENABLE_DEPRECATION_WARNINGS
1127 #endif
1128         ((AVFrame *)frame)->pkt_dts = pkt->dts;
1129         for (i = 0; i < 8; i++) {
1130             if (s->next_refs[i].f->buf[0])
1131                 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1132             if (s->s.refs[i].f->buf[0] &&
1133                 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1134                 return ret;
1135         }
1136         *got_frame = 1;
1137         return pkt->size;
1138     }
1139     data += ret;
1140     size -= ret;
1141
1142     if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1143         if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1144             vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1145         if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1146             (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1147             return ret;
1148     }
1149     if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1150         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1151     if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1152         (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1153         return ret;
1154     if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1155         vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1156     if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1157         return ret;
1158     f = s->s.frames[CUR_FRAME].tf.f;
1159     f->key_frame = s->s.h.keyframe;
1160     f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1161     ls_y = f->linesize[0];
1162     ls_uv =f->linesize[1];
1163
1164     if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1165         (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
1166          s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1167         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1168     }
1169
1170     // ref frame setup
1171     for (i = 0; i < 8; i++) {
1172         if (s->next_refs[i].f->buf[0])
1173             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1174         if (s->s.h.refreshrefmask & (1 << i)) {
1175             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1176         } else if (s->s.refs[i].f->buf[0]) {
1177             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1178         }
1179         if (ret < 0)
1180             return ret;
1181     }
1182
1183     if (avctx->hwaccel) {
1184         ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1185         if (ret < 0)
1186             return ret;
1187         ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1188         if (ret < 0)
1189             return ret;
1190         ret = avctx->hwaccel->end_frame(avctx);
1191         if (ret < 0)
1192             return ret;
1193         goto finish;
1194     }
1195
1196     // main tile decode loop
1197     bytesperpixel = s->bytesperpixel;
1198     memset(s->above_partition_ctx, 0, s->cols);
1199     memset(s->above_skip_ctx, 0, s->cols);
1200     if (s->s.h.keyframe || s->s.h.intraonly) {
1201         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1202     } else {
1203         memset(s->above_mode_ctx, NEARESTMV, s->cols);
1204     }
1205     memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1206     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1207     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1208     memset(s->above_segpred_ctx, 0, s->cols);
1209     s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1210         avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1211     if ((ret = update_block_buffers(avctx)) < 0) {
1212         av_log(avctx, AV_LOG_ERROR,
1213                "Failed to allocate block buffers\n");
1214         return ret;
1215     }
1216     if (s->s.h.refreshctx && s->s.h.parallelmode) {
1217         int j, k, l, m;
1218
1219         for (i = 0; i < 4; i++) {
1220             for (j = 0; j < 2; j++)
1221                 for (k = 0; k < 2; k++)
1222                     for (l = 0; l < 6; l++)
1223                         for (m = 0; m < 6; m++)
1224                             memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1225                                    s->prob.coef[i][j][k][l][m], 3);
1226             if (s->s.h.txfmmode == i)
1227                 break;
1228         }
1229         s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1230         ff_thread_finish_setup(avctx);
1231     } else if (!s->s.h.refreshctx) {
1232         ff_thread_finish_setup(avctx);
1233     }
1234
1235     do {
1236         yoff = uvoff = 0;
1237         s->b = s->b_base;
1238         s->block = s->block_base;
1239         s->uvblock[0] = s->uvblock_base[0];
1240         s->uvblock[1] = s->uvblock_base[1];
1241         s->eob = s->eob_base;
1242         s->uveob[0] = s->uveob_base[0];
1243         s->uveob[1] = s->uveob_base[1];
1244
1245         for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1246             set_tile_offset(&s->tile_row_start, &s->tile_row_end,
1247                             tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1248             if (s->pass != 2) {
1249                 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1250                     int64_t tile_size;
1251
1252                     if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1253                         tile_row == s->s.h.tiling.tile_rows - 1) {
1254                         tile_size = size;
1255                     } else {
1256                         tile_size = AV_RB32(data);
1257                         data += 4;
1258                         size -= 4;
1259                     }
1260                     if (tile_size > size) {
1261                         ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1262                         return AVERROR_INVALIDDATA;
1263                     }
1264                     ret = ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1265                     if (ret < 0)
1266                         return ret;
1267                     if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
1268                         ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1269                         return AVERROR_INVALIDDATA;
1270                     }
1271                     data += tile_size;
1272                     size -= tile_size;
1273                 }
1274             }
1275
1276             for (row = s->tile_row_start; row < s->tile_row_end;
1277                  row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1278                 VP9Filter *lflvl_ptr = s->lflvl;
1279                 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1280
1281                 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1282                     set_tile_offset(&s->tile_col_start, &s->tile_col_end,
1283                                     tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1284
1285                     if (s->pass != 2) {
1286                         memset(s->left_partition_ctx, 0, 8);
1287                         memset(s->left_skip_ctx, 0, 8);
1288                         if (s->s.h.keyframe || s->s.h.intraonly) {
1289                             memset(s->left_mode_ctx, DC_PRED, 16);
1290                         } else {
1291                             memset(s->left_mode_ctx, NEARESTMV, 8);
1292                         }
1293                         memset(s->left_y_nnz_ctx, 0, 16);
1294                         memset(s->left_uv_nnz_ctx, 0, 32);
1295                         memset(s->left_segpred_ctx, 0, 8);
1296
1297                         memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1298                     }
1299
1300                     for (col = s->tile_col_start;
1301                          col < s->tile_col_end;
1302                          col += 8, yoff2 += 64 * bytesperpixel,
1303                          uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1304                         // FIXME integrate with lf code (i.e. zero after each
1305                         // use, similar to invtxfm coefficients, or similar)
1306                         if (s->pass != 1) {
1307                             memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1308                         }
1309
1310                         if (s->pass == 2) {
1311                             decode_sb_mem(avctx, row, col, lflvl_ptr,
1312                                           yoff2, uvoff2, BL_64X64);
1313                         } else {
1314                             decode_sb(avctx, row, col, lflvl_ptr,
1315                                       yoff2, uvoff2, BL_64X64);
1316                         }
1317                     }
1318                     if (s->pass != 2)
1319                         memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1320                 }
1321
1322                 if (s->pass == 1)
1323                     continue;
1324
1325                 // backup pre-loopfilter reconstruction data for intra
1326                 // prediction of next row of sb64s
1327                 if (row + 8 < s->rows) {
1328                     memcpy(s->intra_pred_data[0],
1329                            f->data[0] + yoff + 63 * ls_y,
1330                            8 * s->cols * bytesperpixel);
1331                     memcpy(s->intra_pred_data[1],
1332                            f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1333                            8 * s->cols * bytesperpixel >> s->ss_h);
1334                     memcpy(s->intra_pred_data[2],
1335                            f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1336                            8 * s->cols * bytesperpixel >> s->ss_h);
1337                 }
1338
1339                 // loopfilter one row
1340                 if (s->s.h.filter.level) {
1341                     yoff2 = yoff;
1342                     uvoff2 = uvoff;
1343                     lflvl_ptr = s->lflvl;
1344                     for (col = 0; col < s->cols;
1345                          col += 8, yoff2 += 64 * bytesperpixel,
1346                          uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1347                         ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1348                                              yoff2, uvoff2);
1349                     }
1350                 }
1351
1352                 // FIXME maybe we can make this more finegrained by running the
1353                 // loopfilter per-block instead of after each sbrow
1354                 // In fact that would also make intra pred left preparation easier?
1355                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1356             }
1357         }
1358
1359         if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1360             ff_vp9_adapt_probs(s);
1361             ff_thread_finish_setup(avctx);
1362         }
1363     } while (s->pass++ == 1);
1364     ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1365
1366 finish:
1367     // ref frame setup
1368     for (i = 0; i < 8; i++) {
1369         if (s->s.refs[i].f->buf[0])
1370             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1371         if (s->next_refs[i].f->buf[0] &&
1372             (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1373             return ret;
1374     }
1375
1376     if (!s->s.h.invisible) {
1377         if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1378             return ret;
1379         *got_frame = 1;
1380     }
1381
1382     return pkt->size;
1383 }
1384
1385 static void vp9_decode_flush(AVCodecContext *avctx)
1386 {
1387     VP9Context *s = avctx->priv_data;
1388     int i;
1389
1390     for (i = 0; i < 3; i++)
1391         vp9_frame_unref(avctx, &s->s.frames[i]);
1392     for (i = 0; i < 8; i++)
1393         ff_thread_release_buffer(avctx, &s->s.refs[i]);
1394 }
1395
1396 static int init_frames(AVCodecContext *avctx)
1397 {
1398     VP9Context *s = avctx->priv_data;
1399     int i;
1400
1401     for (i = 0; i < 3; i++) {
1402         s->s.frames[i].tf.f = av_frame_alloc();
1403         if (!s->s.frames[i].tf.f) {
1404             vp9_decode_free(avctx);
1405             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1406             return AVERROR(ENOMEM);
1407         }
1408     }
1409     for (i = 0; i < 8; i++) {
1410         s->s.refs[i].f = av_frame_alloc();
1411         s->next_refs[i].f = av_frame_alloc();
1412         if (!s->s.refs[i].f || !s->next_refs[i].f) {
1413             vp9_decode_free(avctx);
1414             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1415             return AVERROR(ENOMEM);
1416         }
1417     }
1418
1419     return 0;
1420 }
1421
1422 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1423 {
1424     VP9Context *s = avctx->priv_data;
1425
1426     avctx->internal->allocate_progress = 1;
1427     s->last_bpp = 0;
1428     s->s.h.filter.sharpness = -1;
1429
1430     return init_frames(avctx);
1431 }
1432
1433 #if HAVE_THREADS
1434 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1435 {
1436     return init_frames(avctx);
1437 }
1438
1439 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1440 {
1441     int i, ret;
1442     VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1443
1444     for (i = 0; i < 3; i++) {
1445         if (s->s.frames[i].tf.f->buf[0])
1446             vp9_frame_unref(dst, &s->s.frames[i]);
1447         if (ssrc->s.frames[i].tf.f->buf[0]) {
1448             if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1449                 return ret;
1450         }
1451     }
1452     for (i = 0; i < 8; i++) {
1453         if (s->s.refs[i].f->buf[0])
1454             ff_thread_release_buffer(dst, &s->s.refs[i]);
1455         if (ssrc->next_refs[i].f->buf[0]) {
1456             if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1457                 return ret;
1458         }
1459     }
1460
1461     s->s.h.invisible = ssrc->s.h.invisible;
1462     s->s.h.keyframe = ssrc->s.h.keyframe;
1463     s->s.h.intraonly = ssrc->s.h.intraonly;
1464     s->ss_v = ssrc->ss_v;
1465     s->ss_h = ssrc->ss_h;
1466     s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1467     s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1468     s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1469     s->bytesperpixel = ssrc->bytesperpixel;
1470     s->gf_fmt = ssrc->gf_fmt;
1471     s->w = ssrc->w;
1472     s->h = ssrc->h;
1473     s->s.h.bpp = ssrc->s.h.bpp;
1474     s->bpp_index = ssrc->bpp_index;
1475     s->pix_fmt = ssrc->pix_fmt;
1476     memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1477     memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1478     memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1479            sizeof(s->s.h.segmentation.feat));
1480
1481     return 0;
1482 }
1483 #endif
1484
1485 AVCodec ff_vp9_decoder = {
1486     .name                  = "vp9",
1487     .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
1488     .type                  = AVMEDIA_TYPE_VIDEO,
1489     .id                    = AV_CODEC_ID_VP9,
1490     .priv_data_size        = sizeof(VP9Context),
1491     .init                  = vp9_decode_init,
1492     .close                 = vp9_decode_free,
1493     .decode                = vp9_decode_frame,
1494     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
1495     .flush                 = vp9_decode_flush,
1496     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1497     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1498     .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1499 };