git.sesse.net Git - ffmpeg/blob - libavcodec/vp9.c

   1 /*
   2  * VP9 compatible video decoder
   3  *
   4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
   5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include "avcodec.h"
  25 #include "get_bits.h"
  26 #include "hwaccel.h"
  27 #include "internal.h"
  28 #include "profiles.h"
  29 #include "thread.h"
  30 #include "videodsp.h"
  31 #include "vp56.h"
  32 #include "vp9.h"
  33 #include "vp9data.h"
  34 #include "vp9dec.h"
  35 #include "libavutil/avassert.h"
  36 #include "libavutil/pixdesc.h"
  37
  38 #define VP9_SYNCCODE 0x498342
  39
  40 #if HAVE_THREADS
  41 static void vp9_free_entries(AVCodecContext *avctx) {
  42     VP9Context *s = avctx->priv_data;
  43
  44     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  45         pthread_mutex_destroy(&s->progress_mutex);
  46         pthread_cond_destroy(&s->progress_cond);
  47         av_freep(&s->entries);
  48     }
  49 }
  50
  51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
  52     VP9Context *s = avctx->priv_data;
  53     int i;
  54
  55     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  56         if (s->entries)
  57             av_freep(&s->entries);
  58
  59         s->entries = av_malloc_array(n, sizeof(atomic_int));
  60
  61         if (!s->entries) {
  62             av_freep(&s->entries);
  63             return AVERROR(ENOMEM);
  64         }
  65
  66         for (i  = 0; i < n; i++)
  67             atomic_init(&s->entries[i], 0);
  68
  69         pthread_mutex_init(&s->progress_mutex, NULL);
  70         pthread_cond_init(&s->progress_cond, NULL);
  71     }
  72     return 0;
  73 }
  74
  75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
  76     pthread_mutex_lock(&s->progress_mutex);
  77     atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
  78     pthread_cond_signal(&s->progress_cond);
  79     pthread_mutex_unlock(&s->progress_mutex);
  80 }
  81
  82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
  83     if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
  84         return;
  85
  86     pthread_mutex_lock(&s->progress_mutex);
  87     while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
  88         pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
  89     pthread_mutex_unlock(&s->progress_mutex);
  90 }
  91 #else
  92 static void vp9_free_entries(AVCodecContext *avctx) {}
  93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
  94 #endif
  95
  96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
  97 {
  98     ff_thread_release_buffer(avctx, &f->tf);
  99     av_buffer_unref(&f->extradata);
 100     av_buffer_unref(&f->hwaccel_priv_buf);
 101     f->segmentation_map = NULL;
 102     f->hwaccel_picture_private = NULL;
 103 }
 104
 105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
 106 {
 107     VP9Context *s = avctx->priv_data;
 108     int ret, sz;
 109
 110     ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
 111     if (ret < 0)
 112         return ret;
 113
 114     sz = 64 * s->sb_cols * s->sb_rows;
 115     f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
 116     if (!f->extradata) {
 117         goto fail;
 118     }
 119
 120     f->segmentation_map = f->extradata->data;
 121     f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
 122
 123     if (avctx->hwaccel) {
 124         const AVHWAccel *hwaccel = avctx->hwaccel;
 125         av_assert0(!f->hwaccel_picture_private);
 126         if (hwaccel->frame_priv_data_size) {
 127             f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
 128             if (!f->hwaccel_priv_buf)
 129                 goto fail;
 130             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
 131         }
 132     }
 133
 134     return 0;
 135
 136 fail:
 137     vp9_frame_unref(avctx, f);
 138     return AVERROR(ENOMEM);
 139 }
 140
 141 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
 142 {
 143     int ret;
 144
 145     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
 146     if (ret < 0)
 147         return ret;
 148
 149     dst->extradata = av_buffer_ref(src->extradata);
 150     if (!dst->extradata)
 151         goto fail;
 152
 153     dst->segmentation_map = src->segmentation_map;
 154     dst->mv = src->mv;
 155     dst->uses_2pass = src->uses_2pass;
 156
 157     if (src->hwaccel_picture_private) {
 158         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
 159         if (!dst->hwaccel_priv_buf)
 160             goto fail;
 161         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
 162     }
 163
 164     return 0;
 165
 166 fail:
 167     vp9_frame_unref(avctx, dst);
 168     return AVERROR(ENOMEM);
 169 }
 170
 171 static int update_size(AVCodecContext *avctx, int w, int h)
 172 {
 173 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
 174                      CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
 175                      CONFIG_VP9_NVDEC_HWACCEL + \
 176                      CONFIG_VP9_VAAPI_HWACCEL)
 177     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
 178     VP9Context *s = avctx->priv_data;
 179     uint8_t *p;
 180     int bytesperpixel = s->bytesperpixel, ret, cols, rows;
 181     int lflvl_len, i;
 182
 183     av_assert0(w > 0 && h > 0);
 184
 185     if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
 186         if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
 187             return ret;
 188
 189         switch (s->pix_fmt) {
 190         case AV_PIX_FMT_YUV420P:
 191         case AV_PIX_FMT_YUV420P10:
 192 #if CONFIG_VP9_DXVA2_HWACCEL
 193             *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 194 #endif
 195 #if CONFIG_VP9_D3D11VA_HWACCEL
 196             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
 197             *fmtp++ = AV_PIX_FMT_D3D11;
 198 #endif
 199 #if CONFIG_VP9_NVDEC_HWACCEL
 200             *fmtp++ = AV_PIX_FMT_CUDA;
 201 #endif
 202 #if CONFIG_VP9_VAAPI_HWACCEL
 203             *fmtp++ = AV_PIX_FMT_VAAPI;
 204 #endif
 205             break;
 206         case AV_PIX_FMT_YUV420P12:
 207 #if CONFIG_VP9_NVDEC_HWACCEL
 208             *fmtp++ = AV_PIX_FMT_CUDA;
 209 #endif
 210 #if CONFIG_VP9_VAAPI_HWACCEL
 211             *fmtp++ = AV_PIX_FMT_VAAPI;
 212 #endif
 213             break;
 214         }
 215
 216         *fmtp++ = s->pix_fmt;
 217         *fmtp = AV_PIX_FMT_NONE;
 218
 219         ret = ff_thread_get_format(avctx, pix_fmts);
 220         if (ret < 0)
 221             return ret;
 222
 223         avctx->pix_fmt = ret;
 224         s->gf_fmt  = s->pix_fmt;
 225         s->w = w;
 226         s->h = h;
 227     }
 228
 229     cols = (w + 7) >> 3;
 230     rows = (h + 7) >> 3;
 231
 232     if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
 233         return 0;
 234
 235     s->last_fmt  = s->pix_fmt;
 236     s->sb_cols   = (w + 63) >> 6;
 237     s->sb_rows   = (h + 63) >> 6;
 238     s->cols      = (w + 7) >> 3;
 239     s->rows      = (h + 7) >> 3;
 240     lflvl_len    = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
 241
 242 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
 243     av_freep(&s->intra_pred_data[0]);
 244     // FIXME we slightly over-allocate here for subsampled chroma, but a little
 245     // bit of padding shouldn't affect performance...
 246     p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
 247                                 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
 248     if (!p)
 249         return AVERROR(ENOMEM);
 250     assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
 251     assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
 252     assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
 253     assign(s->above_y_nnz_ctx,     uint8_t *,             16);
 254     assign(s->above_mode_ctx,      uint8_t *,             16);
 255     assign(s->above_mv_ctx,        VP56mv(*)[2],          16);
 256     assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
 257     assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
 258     assign(s->above_partition_ctx, uint8_t *,              8);
 259     assign(s->above_skip_ctx,      uint8_t *,              8);
 260     assign(s->above_txfm_ctx,      uint8_t *,              8);
 261     assign(s->above_segpred_ctx,   uint8_t *,              8);
 262     assign(s->above_intra_ctx,     uint8_t *,              8);
 263     assign(s->above_comp_ctx,      uint8_t *,              8);
 264     assign(s->above_ref_ctx,       uint8_t *,              8);
 265     assign(s->above_filter_ctx,    uint8_t *,              8);
 266     assign(s->lflvl,               VP9Filter *,            lflvl_len);
 267 #undef assign
 268
 269     if (s->td) {
 270         for (i = 0; i < s->active_tile_cols; i++) {
 271             av_freep(&s->td[i].b_base);
 272             av_freep(&s->td[i].block_base);
 273         }
 274     }
 275
 276     if (s->s.h.bpp != s->last_bpp) {
 277         ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
 278         ff_videodsp_init(&s->vdsp, s->s.h.bpp);
 279         s->last_bpp = s->s.h.bpp;
 280     }
 281
 282     return 0;
 283 }
 284
 285 static int update_block_buffers(AVCodecContext *avctx)
 286 {
 287     int i;
 288     VP9Context *s = avctx->priv_data;
 289     int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
 290     VP9TileData *td = &s->td[0];
 291
 292     if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
 293         return 0;
 294
 295     av_free(td->b_base);
 296     av_free(td->block_base);
 297     chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
 298     chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
 299     if (s->s.frames[CUR_FRAME].uses_2pass) {
 300         int sbs = s->sb_cols * s->sb_rows;
 301
 302         td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
 303         td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 304                                     16 * 16 + 2 * chroma_eobs) * sbs);
 305         if (!td->b_base || !td->block_base)
 306             return AVERROR(ENOMEM);
 307         td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
 308         td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
 309         td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
 310         td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
 311         td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
 312     } else {
 313         for (i = 1; i < s->active_tile_cols; i++) {
 314             if (s->td[i].b_base && s->td[i].block_base) {
 315                 av_free(s->td[i].b_base);
 316                 av_free(s->td[i].block_base);
 317             }
 318         }
 319         for (i = 0; i < s->active_tile_cols; i++) {
 320             s->td[i].b_base = av_malloc(sizeof(VP9Block));
 321             s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 322                                        16 * 16 + 2 * chroma_eobs);
 323             if (!s->td[i].b_base || !s->td[i].block_base)
 324                 return AVERROR(ENOMEM);
 325             s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
 326             s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
 327             s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
 328             s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
 329             s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
 330         }
 331     }
 332     s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
 333
 334     return 0;
 335 }
 336
 337 // The sign bit is at the end, not the start, of a bit sequence
 338 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
 339 {
 340     int v = get_bits(gb, n);
 341     return get_bits1(gb) ? -v : v;
 342 }
 343
 344 static av_always_inline int inv_recenter_nonneg(int v, int m)
 345 {
 346     if (v > 2 * m)
 347         return v;
 348     if (v & 1)
 349         return m - ((v + 1) >> 1);
 350     return m + (v >> 1);
 351 }
 352
 353 // differential forward probability updates
 354 static int update_prob(VP56RangeCoder *c, int p)
 355 {
 356     static const int inv_map_table[255] = {
 357           7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
 358         189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
 359          10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
 360          25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
 361          40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
 362          55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
 363          70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
 364          86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
 365         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
 366         116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
 367         131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
 368         146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 369         161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
 370         177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
 371         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
 372         207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
 373         222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
 374         237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
 375         252, 253, 253,
 376     };
 377     int d;
 378
 379     /* This code is trying to do a differential probability update. For a
 380      * current probability A in the range [1, 255], the difference to a new
 381      * probability of any value can be expressed differentially as 1-A, 255-A
 382      * where some part of this (absolute range) exists both in positive as
 383      * well as the negative part, whereas another part only exists in one
 384      * half. We're trying to code this shared part differentially, i.e.
 385      * times two where the value of the lowest bit specifies the sign, and
 386      * the single part is then coded on top of this. This absolute difference
 387      * then again has a value of [0, 254], but a bigger value in this range
 388      * indicates that we're further away from the original value A, so we
 389      * can code this as a VLC code, since higher values are increasingly
 390      * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
 391      * updates vs. the 'fine, exact' updates further down the range, which
 392      * adds one extra dimension to this differential update model. */
 393
 394     if (!vp8_rac_get(c)) {
 395         d = vp8_rac_get_uint(c, 4) + 0;
 396     } else if (!vp8_rac_get(c)) {
 397         d = vp8_rac_get_uint(c, 4) + 16;
 398     } else if (!vp8_rac_get(c)) {
 399         d = vp8_rac_get_uint(c, 5) + 32;
 400     } else {
 401         d = vp8_rac_get_uint(c, 7);
 402         if (d >= 65)
 403             d = (d << 1) - 65 + vp8_rac_get(c);
 404         d += 64;
 405         av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
 406     }
 407
 408     return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
 409                     255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 410 }
 411
 412 static int read_colorspace_details(AVCodecContext *avctx)
 413 {
 414     static const enum AVColorSpace colorspaces[8] = {
 415         AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
 416         AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
 417     };
 418     VP9Context *s = avctx->priv_data;
 419     int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
 420
 421     s->bpp_index = bits;
 422     s->s.h.bpp = 8 + bits * 2;
 423     s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
 424     avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
 425     if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
 426         static const enum AVPixelFormat pix_fmt_rgb[3] = {
 427             AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
 428         };
 429         s->ss_h = s->ss_v = 0;
 430         avctx->color_range = AVCOL_RANGE_JPEG;
 431         s->pix_fmt = pix_fmt_rgb[bits];
 432         if (avctx->profile & 1) {
 433             if (get_bits1(&s->gb)) {
 434                 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
 435                 return AVERROR_INVALIDDATA;
 436             }
 437         } else {
 438             av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
 439                    avctx->profile);
 440             return AVERROR_INVALIDDATA;
 441         }
 442     } else {
 443         static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
 444             { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
 445               { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
 446             { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
 447               { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
 448             { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
 449               { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
 450         };
 451         avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
 452         if (avctx->profile & 1) {
 453             s->ss_h = get_bits1(&s->gb);
 454             s->ss_v = get_bits1(&s->gb);
 455             s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
 456             if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
 457                 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
 458                        avctx->profile);
 459                 return AVERROR_INVALIDDATA;
 460             } else if (get_bits1(&s->gb)) {
 461                 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
 462                        avctx->profile);
 463                 return AVERROR_INVALIDDATA;
 464             }
 465         } else {
 466             s->ss_h = s->ss_v = 1;
 467             s->pix_fmt = pix_fmt_for_ss[bits][1][1];
 468         }
 469     }
 470
 471     return 0;
 472 }
 473
 474 static int decode_frame_header(AVCodecContext *avctx,
 475                                const uint8_t *data, int size, int *ref)
 476 {
 477     VP9Context *s = avctx->priv_data;
 478     int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
 479     int last_invisible;
 480     const uint8_t *data2;
 481
 482     /* general header */
 483     if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
 484         av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
 485         return ret;
 486     }
 487     if (get_bits(&s->gb, 2) != 0x2) { // frame marker
 488         av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
 489         return AVERROR_INVALIDDATA;
 490     }
 491     avctx->profile  = get_bits1(&s->gb);
 492     avctx->profile |= get_bits1(&s->gb) << 1;
 493     if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
 494     if (avctx->profile > 3) {
 495         av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
 496         return AVERROR_INVALIDDATA;
 497     }
 498     s->s.h.profile = avctx->profile;
 499     if (get_bits1(&s->gb)) {
 500         *ref = get_bits(&s->gb, 3);
 501         return 0;
 502     }
 503
 504     s->last_keyframe  = s->s.h.keyframe;
 505     s->s.h.keyframe   = !get_bits1(&s->gb);
 506
 507     last_invisible   = s->s.h.invisible;
 508     s->s.h.invisible = !get_bits1(&s->gb);
 509     s->s.h.errorres  = get_bits1(&s->gb);
 510     s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
 511
 512     if (s->s.h.keyframe) {
 513         if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 514             av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 515             return AVERROR_INVALIDDATA;
 516         }
 517         if ((ret = read_colorspace_details(avctx)) < 0)
 518             return ret;
 519         // for profile 1, here follows the subsampling bits
 520         s->s.h.refreshrefmask = 0xff;
 521         w = get_bits(&s->gb, 16) + 1;
 522         h = get_bits(&s->gb, 16) + 1;
 523         if (get_bits1(&s->gb)) // display size
 524             skip_bits(&s->gb, 32);
 525     } else {
 526         s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
 527         s->s.h.resetctx  = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
 528         if (s->s.h.intraonly) {
 529             if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 530                 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 531                 return AVERROR_INVALIDDATA;
 532             }
 533             if (avctx->profile >= 1) {
 534                 if ((ret = read_colorspace_details(avctx)) < 0)
 535                     return ret;
 536             } else {
 537                 s->ss_h = s->ss_v = 1;
 538                 s->s.h.bpp = 8;
 539                 s->bpp_index = 0;
 540                 s->bytesperpixel = 1;
 541                 s->pix_fmt = AV_PIX_FMT_YUV420P;
 542                 avctx->colorspace = AVCOL_SPC_BT470BG;
 543                 avctx->color_range = AVCOL_RANGE_MPEG;
 544             }
 545             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 546             w = get_bits(&s->gb, 16) + 1;
 547             h = get_bits(&s->gb, 16) + 1;
 548             if (get_bits1(&s->gb)) // display size
 549                 skip_bits(&s->gb, 32);
 550         } else {
 551             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 552             s->s.h.refidx[0]      = get_bits(&s->gb, 3);
 553             s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
 554             s->s.h.refidx[1]      = get_bits(&s->gb, 3);
 555             s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
 556             s->s.h.refidx[2]      = get_bits(&s->gb, 3);
 557             s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
 558             if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
 559                 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
 560                 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
 561                 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
 562                 return AVERROR_INVALIDDATA;
 563             }
 564             if (get_bits1(&s->gb)) {
 565                 w = s->s.refs[s->s.h.refidx[0]].f->width;
 566                 h = s->s.refs[s->s.h.refidx[0]].f->height;
 567             } else if (get_bits1(&s->gb)) {
 568                 w = s->s.refs[s->s.h.refidx[1]].f->width;
 569                 h = s->s.refs[s->s.h.refidx[1]].f->height;
 570             } else if (get_bits1(&s->gb)) {
 571                 w = s->s.refs[s->s.h.refidx[2]].f->width;
 572                 h = s->s.refs[s->s.h.refidx[2]].f->height;
 573             } else {
 574                 w = get_bits(&s->gb, 16) + 1;
 575                 h = get_bits(&s->gb, 16) + 1;
 576             }
 577             // Note that in this code, "CUR_FRAME" is actually before we
 578             // have formally allocated a frame, and thus actually represents
 579             // the _last_ frame
 580             s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
 581                                        s->s.frames[CUR_FRAME].tf.f->height == h;
 582             if (get_bits1(&s->gb)) // display size
 583                 skip_bits(&s->gb, 32);
 584             s->s.h.highprecisionmvs = get_bits1(&s->gb);
 585             s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
 586                                                   get_bits(&s->gb, 2);
 587             s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
 588                                   s->s.h.signbias[0] != s->s.h.signbias[2];
 589             if (s->s.h.allowcompinter) {
 590                 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
 591                     s->s.h.fixcompref    = 2;
 592                     s->s.h.varcompref[0] = 0;
 593                     s->s.h.varcompref[1] = 1;
 594                 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
 595                     s->s.h.fixcompref    = 1;
 596                     s->s.h.varcompref[0] = 0;
 597                     s->s.h.varcompref[1] = 2;
 598                 } else {
 599                     s->s.h.fixcompref    = 0;
 600                     s->s.h.varcompref[0] = 1;
 601                     s->s.h.varcompref[1] = 2;
 602                 }
 603             }
 604         }
 605     }
 606     s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
 607     s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
 608     s->s.h.framectxid   = c = get_bits(&s->gb, 2);
 609     if (s->s.h.keyframe || s->s.h.intraonly)
 610         s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
 611
 612     /* loopfilter header data */
 613     if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
 614         // reset loopfilter defaults
 615         s->s.h.lf_delta.ref[0] = 1;
 616         s->s.h.lf_delta.ref[1] = 0;
 617         s->s.h.lf_delta.ref[2] = -1;
 618         s->s.h.lf_delta.ref[3] = -1;
 619         s->s.h.lf_delta.mode[0] = 0;
 620         s->s.h.lf_delta.mode[1] = 0;
 621         memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
 622     }
 623     s->s.h.filter.level = get_bits(&s->gb, 6);
 624     sharp = get_bits(&s->gb, 3);
 625     // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
 626     // the old cache values since they are still valid
 627     if (s->s.h.filter.sharpness != sharp) {
 628         for (i = 1; i <= 63; i++) {
 629             int limit = i;
 630
 631             if (sharp > 0) {
 632                 limit >>= (sharp + 3) >> 2;
 633                 limit = FFMIN(limit, 9 - sharp);
 634             }
 635             limit = FFMAX(limit, 1);
 636
 637             s->filter_lut.lim_lut[i] = limit;
 638             s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
 639         }
 640     }
 641     s->s.h.filter.sharpness = sharp;
 642     if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
 643         if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
 644             for (i = 0; i < 4; i++)
 645                 if (get_bits1(&s->gb))
 646                     s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
 647             for (i = 0; i < 2; i++)
 648                 if (get_bits1(&s->gb))
 649                     s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
 650         }
 651     }
 652
 653     /* quantization header data */
 654     s->s.h.yac_qi      = get_bits(&s->gb, 8);
 655     s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 656     s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 657     s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 658     s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
 659                        s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
 660     if (s->s.h.lossless)
 661         avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
 662
 663     /* segmentation header info */
 664     if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
 665         if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
 666             for (i = 0; i < 7; i++)
 667                 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
 668                                  get_bits(&s->gb, 8) : 255;
 669             if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
 670                 for (i = 0; i < 3; i++)
 671                     s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
 672                                          get_bits(&s->gb, 8) : 255;
 673         }
 674
 675         if (get_bits1(&s->gb)) {
 676             s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
 677             for (i = 0; i < 8; i++) {
 678                 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
 679                     s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
 680                 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
 681                     s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
 682                 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
 683                     s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
 684                 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
 685             }
 686         }
 687     }
 688
 689     // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
 690     for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
 691         int qyac, qydc, quvac, quvdc, lflvl, sh;
 692
 693         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
 694             if (s->s.h.segmentation.absolute_vals)
 695                 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
 696             else
 697                 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
 698         } else {
 699             qyac  = s->s.h.yac_qi;
 700         }
 701         qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
 702         quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
 703         quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
 704         qyac  = av_clip_uintp2(qyac, 8);
 705
 706         s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
 707         s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
 708         s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
 709         s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
 710
 711         sh = s->s.h.filter.level >= 32;
 712         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
 713             if (s->s.h.segmentation.absolute_vals)
 714                 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
 715             else
 716                 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
 717         } else {
 718             lflvl  = s->s.h.filter.level;
 719         }
 720         if (s->s.h.lf_delta.enabled) {
 721             s->s.h.segmentation.feat[i].lflvl[0][0] =
 722             s->s.h.segmentation.feat[i].lflvl[0][1] =
 723                 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
 724             for (j = 1; j < 4; j++) {
 725                 s->s.h.segmentation.feat[i].lflvl[j][0] =
 726                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 727                                              s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
 728                 s->s.h.segmentation.feat[i].lflvl[j][1] =
 729                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 730                                              s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
 731             }
 732         } else {
 733             memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
 734                    sizeof(s->s.h.segmentation.feat[i].lflvl));
 735         }
 736     }
 737
 738     /* tiling info */
 739     if ((ret = update_size(avctx, w, h)) < 0) {
 740         av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
 741                w, h, s->pix_fmt);
 742         return ret;
 743     }
 744     for (s->s.h.tiling.log2_tile_cols = 0;
 745          s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
 746          s->s.h.tiling.log2_tile_cols++) ;
 747     for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
 748     max = FFMAX(0, max - 1);
 749     while (max > s->s.h.tiling.log2_tile_cols) {
 750         if (get_bits1(&s->gb))
 751             s->s.h.tiling.log2_tile_cols++;
 752         else
 753             break;
 754     }
 755     s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
 756     s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
 757     if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
 758         int n_range_coders;
 759         VP56RangeCoder *rc;
 760
 761         if (s->td) {
 762             for (i = 0; i < s->active_tile_cols; i++) {
 763                 av_free(s->td[i].b_base);
 764                 av_free(s->td[i].block_base);
 765             }
 766             av_free(s->td);
 767         }
 768
 769         s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
 770         vp9_free_entries(avctx);
 771         s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
 772                               s->s.h.tiling.tile_cols : 1;
 773         vp9_alloc_entries(avctx, s->sb_rows);
 774         if (avctx->active_thread_type == FF_THREAD_SLICE) {
 775             n_range_coders = 4; // max_tile_rows
 776         } else {
 777             n_range_coders = s->s.h.tiling.tile_cols;
 778         }
 779         s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
 780                                  n_range_coders * sizeof(VP56RangeCoder));
 781         if (!s->td)
 782             return AVERROR(ENOMEM);
 783         rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
 784         for (i = 0; i < s->active_tile_cols; i++) {
 785             s->td[i].s = s;
 786             s->td[i].c_b = rc;
 787             rc += n_range_coders;
 788         }
 789     }
 790
 791     /* check reference frames */
 792     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 793         for (i = 0; i < 3; i++) {
 794             AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
 795             int refw = ref->width, refh = ref->height;
 796
 797             if (ref->format != avctx->pix_fmt) {
 798                 av_log(avctx, AV_LOG_ERROR,
 799                        "Ref pixfmt (%s) did not match current frame (%s)",
 800                        av_get_pix_fmt_name(ref->format),
 801                        av_get_pix_fmt_name(avctx->pix_fmt));
 802                 return AVERROR_INVALIDDATA;
 803             } else if (refw == w && refh == h) {
 804                 s->mvscale[i][0] = s->mvscale[i][1] = 0;
 805             } else {
 806                 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
 807                     av_log(avctx, AV_LOG_ERROR,
 808                            "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
 809                            refw, refh, w, h);
 810                     return AVERROR_INVALIDDATA;
 811                 }
 812                 s->mvscale[i][0] = (refw << 14) / w;
 813                 s->mvscale[i][1] = (refh << 14) / h;
 814                 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
 815                 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
 816             }
 817         }
 818     }
 819
 820     if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
 821         s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
 822                            s->prob_ctx[3].p = ff_vp9_default_probs;
 823         memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
 824                sizeof(ff_vp9_default_coef_probs));
 825         memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
 826                sizeof(ff_vp9_default_coef_probs));
 827         memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
 828                sizeof(ff_vp9_default_coef_probs));
 829         memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
 830                sizeof(ff_vp9_default_coef_probs));
 831     } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
 832         s->prob_ctx[c].p = ff_vp9_default_probs;
 833         memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
 834                sizeof(ff_vp9_default_coef_probs));
 835     }
 836
 837     // next 16 bits is size of the rest of the header (arith-coded)
 838     s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
 839     s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
 840
 841     data2 = align_get_bits(&s->gb);
 842     if (size2 > size - (data2 - data)) {
 843         av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
 844         return AVERROR_INVALIDDATA;
 845     }
 846     ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
 847     if (ret < 0)
 848         return ret;
 849
 850     if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
 851         av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
 852         return AVERROR_INVALIDDATA;
 853     }
 854
 855     for (i = 0; i < s->active_tile_cols; i++) {
 856         if (s->s.h.keyframe || s->s.h.intraonly) {
 857             memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
 858             memset(s->td[i].counts.eob,  0, sizeof(s->td[0].counts.eob));
 859         } else {
 860             memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
 861         }
 862     }
 863
 864     /* FIXME is it faster to not copy here, but do it down in the fw updates
 865      * as explicit copies if the fw update is missing (and skip the copy upon
 866      * fw update)? */
 867     s->prob.p = s->prob_ctx[c].p;
 868
 869     // txfm updates
 870     if (s->s.h.lossless) {
 871         s->s.h.txfmmode = TX_4X4;
 872     } else {
 873         s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
 874         if (s->s.h.txfmmode == 3)
 875             s->s.h.txfmmode += vp8_rac_get(&s->c);
 876
 877         if (s->s.h.txfmmode == TX_SWITCHABLE) {
 878             for (i = 0; i < 2; i++)
 879                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 880                     s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
 881             for (i = 0; i < 2; i++)
 882                 for (j = 0; j < 2; j++)
 883                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 884                         s->prob.p.tx16p[i][j] =
 885                             update_prob(&s->c, s->prob.p.tx16p[i][j]);
 886             for (i = 0; i < 2; i++)
 887                 for (j = 0; j < 3; j++)
 888                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 889                         s->prob.p.tx32p[i][j] =
 890                             update_prob(&s->c, s->prob.p.tx32p[i][j]);
 891         }
 892     }
 893
 894     // coef updates
 895     for (i = 0; i < 4; i++) {
 896         uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
 897         if (vp8_rac_get(&s->c)) {
 898             for (j = 0; j < 2; j++)
 899                 for (k = 0; k < 2; k++)
 900                     for (l = 0; l < 6; l++)
 901                         for (m = 0; m < 6; m++) {
 902                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 903                             uint8_t *r = ref[j][k][l][m];
 904                             if (m >= 3 && l == 0) // dc only has 3 pt
 905                                 break;
 906                             for (n = 0; n < 3; n++) {
 907                                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 908                                     p[n] = update_prob(&s->c, r[n]);
 909                                 else
 910                                     p[n] = r[n];
 911                             }
 912                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 913                         }
 914         } else {
 915             for (j = 0; j < 2; j++)
 916                 for (k = 0; k < 2; k++)
 917                     for (l = 0; l < 6; l++)
 918                         for (m = 0; m < 6; m++) {
 919                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 920                             uint8_t *r = ref[j][k][l][m];
 921                             if (m > 3 && l == 0) // dc only has 3 pt
 922                                 break;
 923                             memcpy(p, r, 3);
 924                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 925                         }
 926         }
 927         if (s->s.h.txfmmode == i)
 928             break;
 929     }
 930
 931     // mode updates
 932     for (i = 0; i < 3; i++)
 933         if (vp56_rac_get_prob_branchy(&s->c, 252))
 934             s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
 935     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 936         for (i = 0; i < 7; i++)
 937             for (j = 0; j < 3; j++)
 938                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 939                     s->prob.p.mv_mode[i][j] =
 940                         update_prob(&s->c, s->prob.p.mv_mode[i][j]);
 941
 942         if (s->s.h.filtermode == FILTER_SWITCHABLE)
 943             for (i = 0; i < 4; i++)
 944                 for (j = 0; j < 2; j++)
 945                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 946                         s->prob.p.filter[i][j] =
 947                             update_prob(&s->c, s->prob.p.filter[i][j]);
 948
 949         for (i = 0; i < 4; i++)
 950             if (vp56_rac_get_prob_branchy(&s->c, 252))
 951                 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
 952
 953         if (s->s.h.allowcompinter) {
 954             s->s.h.comppredmode = vp8_rac_get(&s->c);
 955             if (s->s.h.comppredmode)
 956                 s->s.h.comppredmode += vp8_rac_get(&s->c);
 957             if (s->s.h.comppredmode == PRED_SWITCHABLE)
 958                 for (i = 0; i < 5; i++)
 959                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 960                         s->prob.p.comp[i] =
 961                             update_prob(&s->c, s->prob.p.comp[i]);
 962         } else {
 963             s->s.h.comppredmode = PRED_SINGLEREF;
 964         }
 965
 966         if (s->s.h.comppredmode != PRED_COMPREF) {
 967             for (i = 0; i < 5; i++) {
 968                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 969                     s->prob.p.single_ref[i][0] =
 970                         update_prob(&s->c, s->prob.p.single_ref[i][0]);
 971                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 972                     s->prob.p.single_ref[i][1] =
 973                         update_prob(&s->c, s->prob.p.single_ref[i][1]);
 974             }
 975         }
 976
 977         if (s->s.h.comppredmode != PRED_SINGLEREF) {
 978             for (i = 0; i < 5; i++)
 979                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 980                     s->prob.p.comp_ref[i] =
 981                         update_prob(&s->c, s->prob.p.comp_ref[i]);
 982         }
 983
 984         for (i = 0; i < 4; i++)
 985             for (j = 0; j < 9; j++)
 986                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 987                     s->prob.p.y_mode[i][j] =
 988                         update_prob(&s->c, s->prob.p.y_mode[i][j]);
 989
 990         for (i = 0; i < 4; i++)
 991             for (j = 0; j < 4; j++)
 992                 for (k = 0; k < 3; k++)
 993                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 994                         s->prob.p.partition[3 - i][j][k] =
 995                             update_prob(&s->c,
 996                                         s->prob.p.partition[3 - i][j][k]);
 997
 998         // mv fields don't use the update_prob subexp model for some reason
 999         for (i = 0; i < 3; i++)
1000             if (vp56_rac_get_prob_branchy(&s->c, 252))
1001                 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1002
1003         for (i = 0; i < 2; i++) {
1004             if (vp56_rac_get_prob_branchy(&s->c, 252))
1005                 s->prob.p.mv_comp[i].sign =
1006                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1007
1008             for (j = 0; j < 10; j++)
1009                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1010                     s->prob.p.mv_comp[i].classes[j] =
1011                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1012
1013             if (vp56_rac_get_prob_branchy(&s->c, 252))
1014                 s->prob.p.mv_comp[i].class0 =
1015                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1016
1017             for (j = 0; j < 10; j++)
1018                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019                     s->prob.p.mv_comp[i].bits[j] =
1020                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1021         }
1022
1023         for (i = 0; i < 2; i++) {
1024             for (j = 0; j < 2; j++)
1025                 for (k = 0; k < 3; k++)
1026                     if (vp56_rac_get_prob_branchy(&s->c, 252))
1027                         s->prob.p.mv_comp[i].class0_fp[j][k] =
1028                             (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1029
1030             for (j = 0; j < 3; j++)
1031                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1032                     s->prob.p.mv_comp[i].fp[j] =
1033                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1034         }
1035
1036         if (s->s.h.highprecisionmvs) {
1037             for (i = 0; i < 2; i++) {
1038                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1039                     s->prob.p.mv_comp[i].class0_hp =
1040                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041
1042                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043                     s->prob.p.mv_comp[i].hp =
1044                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1045             }
1046         }
1047     }
1048
1049     return (data2 - data) + size2;
1050 }
1051
1052 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1053                       ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1054 {
1055     const VP9Context *s = td->s;
1056     int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1057             (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1058     const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1059                                                      s->prob.p.partition[bl][c];
1060     enum BlockPartition bp;
1061     ptrdiff_t hbs = 4 >> bl;
1062     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1063     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1064     int bytesperpixel = s->bytesperpixel;
1065
1066     if (bl == BL_8X8) {
1067         bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1068         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1069     } else if (col + hbs < s->cols) { // FIXME why not <=?
1070         if (row + hbs < s->rows) { // FIXME why not <=?
1071             bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1072             switch (bp) {
1073             case PARTITION_NONE:
1074                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1075                 break;
1076             case PARTITION_H:
1077                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1078                 yoff  += hbs * 8 * y_stride;
1079                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1080                 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1081                 break;
1082             case PARTITION_V:
1083                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1084                 yoff  += hbs * 8 * bytesperpixel;
1085                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1086                 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1087                 break;
1088             case PARTITION_SPLIT:
1089                 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1090                 decode_sb(td, row, col + hbs, lflvl,
1091                           yoff + 8 * hbs * bytesperpixel,
1092                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1093                 yoff  += hbs * 8 * y_stride;
1094                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1095                 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1096                 decode_sb(td, row + hbs, col + hbs, lflvl,
1097                           yoff + 8 * hbs * bytesperpixel,
1098                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1099                 break;
1100             default:
1101                 av_assert0(0);
1102             }
1103         } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1104             bp = PARTITION_SPLIT;
1105             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1106             decode_sb(td, row, col + hbs, lflvl,
1107                       yoff + 8 * hbs * bytesperpixel,
1108                       uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1109         } else {
1110             bp = PARTITION_H;
1111             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1112         }
1113     } else if (row + hbs < s->rows) { // FIXME why not <=?
1114         if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1115             bp = PARTITION_SPLIT;
1116             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1117             yoff  += hbs * 8 * y_stride;
1118             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1119             decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1120         } else {
1121             bp = PARTITION_V;
1122             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1123         }
1124     } else {
1125         bp = PARTITION_SPLIT;
1126         decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1127     }
1128     td->counts.partition[bl][c][bp]++;
1129 }
1130
1131 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1132                           ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1133 {
1134     const VP9Context *s = td->s;
1135     VP9Block *b = td->b;
1136     ptrdiff_t hbs = 4 >> bl;
1137     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1138     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1139     int bytesperpixel = s->bytesperpixel;
1140
1141     if (bl == BL_8X8) {
1142         av_assert2(b->bl == BL_8X8);
1143         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1144     } else if (td->b->bl == bl) {
1145         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1146         if (b->bp == PARTITION_H && row + hbs < s->rows) {
1147             yoff  += hbs * 8 * y_stride;
1148             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1149             ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1150         } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1151             yoff  += hbs * 8 * bytesperpixel;
1152             uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1153             ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1154         }
1155     } else {
1156         decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1157         if (col + hbs < s->cols) { // FIXME why not <=?
1158             if (row + hbs < s->rows) {
1159                 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1160                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1161                 yoff  += hbs * 8 * y_stride;
1162                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1163                 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1164                 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1165                               yoff + 8 * hbs * bytesperpixel,
1166                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1167             } else {
1168                 yoff  += hbs * 8 * bytesperpixel;
1169                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1170                 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1171             }
1172         } else if (row + hbs < s->rows) {
1173             yoff  += hbs * 8 * y_stride;
1174             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1175             decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1176         }
1177     }
1178 }
1179
1180 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1181 {
1182     int sb_start = ( idx      * n) >> log2_n;
1183     int sb_end   = ((idx + 1) * n) >> log2_n;
1184     *start = FFMIN(sb_start, n) << 3;
1185     *end   = FFMIN(sb_end,   n) << 3;
1186 }
1187
1188 static void free_buffers(VP9Context *s)
1189 {
1190     int i;
1191
1192     av_freep(&s->intra_pred_data[0]);
1193     for (i = 0; i < s->active_tile_cols; i++) {
1194         av_freep(&s->td[i].b_base);
1195         av_freep(&s->td[i].block_base);
1196     }
1197 }
1198
1199 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1200 {
1201     VP9Context *s = avctx->priv_data;
1202     int i;
1203
1204     for (i = 0; i < 3; i++) {
1205         if (s->s.frames[i].tf.f->buf[0])
1206             vp9_frame_unref(avctx, &s->s.frames[i]);
1207         av_frame_free(&s->s.frames[i].tf.f);
1208     }
1209     for (i = 0; i < 8; i++) {
1210         if (s->s.refs[i].f->buf[0])
1211             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1212         av_frame_free(&s->s.refs[i].f);
1213         if (s->next_refs[i].f->buf[0])
1214             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1215         av_frame_free(&s->next_refs[i].f);
1216     }
1217
1218     free_buffers(s);
1219     vp9_free_entries(avctx);
1220     av_freep(&s->td);
1221     return 0;
1222 }
1223
1224 static int decode_tiles(AVCodecContext *avctx,
1225                         const uint8_t *data, int size)
1226 {
1227     VP9Context *s = avctx->priv_data;
1228     VP9TileData *td = &s->td[0];
1229     int row, col, tile_row, tile_col, ret;
1230     int bytesperpixel;
1231     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1232     AVFrame *f;
1233     ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1234
1235     f = s->s.frames[CUR_FRAME].tf.f;
1236     ls_y = f->linesize[0];
1237     ls_uv =f->linesize[1];
1238     bytesperpixel = s->bytesperpixel;
1239
1240     yoff = uvoff = 0;
1241     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1242         set_tile_offset(&tile_row_start, &tile_row_end,
1243                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1244
1245         for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1246             int64_t tile_size;
1247
1248             if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1249                 tile_row == s->s.h.tiling.tile_rows - 1) {
1250                 tile_size = size;
1251             } else {
1252                 tile_size = AV_RB32(data);
1253                 data += 4;
1254                 size -= 4;
1255             }
1256             if (tile_size > size) {
1257                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1258                 return AVERROR_INVALIDDATA;
1259             }
1260             ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1261             if (ret < 0)
1262                 return ret;
1263             if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1264                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1265                 return AVERROR_INVALIDDATA;
1266             }
1267             data += tile_size;
1268             size -= tile_size;
1269         }
1270
1271         for (row = tile_row_start; row < tile_row_end;
1272              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1273             VP9Filter *lflvl_ptr = s->lflvl;
1274             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1275
1276             for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1277                 set_tile_offset(&tile_col_start, &tile_col_end,
1278                                 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1279                 td->tile_col_start = tile_col_start;
1280                 if (s->pass != 2) {
1281                     memset(td->left_partition_ctx, 0, 8);
1282                     memset(td->left_skip_ctx, 0, 8);
1283                     if (s->s.h.keyframe || s->s.h.intraonly) {
1284                         memset(td->left_mode_ctx, DC_PRED, 16);
1285                     } else {
1286                         memset(td->left_mode_ctx, NEARESTMV, 8);
1287                     }
1288                     memset(td->left_y_nnz_ctx, 0, 16);
1289                     memset(td->left_uv_nnz_ctx, 0, 32);
1290                     memset(td->left_segpred_ctx, 0, 8);
1291
1292                     td->c = &td->c_b[tile_col];
1293                 }
1294
1295                 for (col = tile_col_start;
1296                      col < tile_col_end;
1297                      col += 8, yoff2 += 64 * bytesperpixel,
1298                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1299                     // FIXME integrate with lf code (i.e. zero after each
1300                     // use, similar to invtxfm coefficients, or similar)
1301                     if (s->pass != 1) {
1302                         memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1303                     }
1304
1305                     if (s->pass == 2) {
1306                         decode_sb_mem(td, row, col, lflvl_ptr,
1307                                       yoff2, uvoff2, BL_64X64);
1308                     } else {
1309                         decode_sb(td, row, col, lflvl_ptr,
1310                                   yoff2, uvoff2, BL_64X64);
1311                     }
1312                 }
1313             }
1314
1315             if (s->pass == 1)
1316                 continue;
1317
1318             // backup pre-loopfilter reconstruction data for intra
1319             // prediction of next row of sb64s
1320             if (row + 8 < s->rows) {
1321                 memcpy(s->intra_pred_data[0],
1322                        f->data[0] + yoff + 63 * ls_y,
1323                        8 * s->cols * bytesperpixel);
1324                 memcpy(s->intra_pred_data[1],
1325                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1326                        8 * s->cols * bytesperpixel >> s->ss_h);
1327                 memcpy(s->intra_pred_data[2],
1328                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1329                        8 * s->cols * bytesperpixel >> s->ss_h);
1330             }
1331
1332             // loopfilter one row
1333             if (s->s.h.filter.level) {
1334                 yoff2 = yoff;
1335                 uvoff2 = uvoff;
1336                 lflvl_ptr = s->lflvl;
1337                 for (col = 0; col < s->cols;
1338                      col += 8, yoff2 += 64 * bytesperpixel,
1339                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1340                     ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1341                                          yoff2, uvoff2);
1342                 }
1343             }
1344
1345             // FIXME maybe we can make this more finegrained by running the
1346             // loopfilter per-block instead of after each sbrow
1347             // In fact that would also make intra pred left preparation easier?
1348             ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1349         }
1350     }
1351     return 0;
1352 }
1353
1354 #if HAVE_THREADS
1355 static av_always_inline
1356 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1357                               int threadnr)
1358 {
1359     VP9Context *s = avctx->priv_data;
1360     VP9TileData *td = &s->td[jobnr];
1361     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1362     int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1363     unsigned tile_cols_len;
1364     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1365     VP9Filter *lflvl_ptr_base;
1366     AVFrame *f;
1367
1368     f = s->s.frames[CUR_FRAME].tf.f;
1369     ls_y = f->linesize[0];
1370     ls_uv =f->linesize[1];
1371
1372     set_tile_offset(&tile_col_start, &tile_col_end,
1373                     jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1374     td->tile_col_start  = tile_col_start;
1375     uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1376     yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1377     lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1378
1379     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1380         set_tile_offset(&tile_row_start, &tile_row_end,
1381                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1382
1383         td->c = &td->c_b[tile_row];
1384         for (row = tile_row_start; row < tile_row_end;
1385              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1386             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1387             VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1388
1389             memset(td->left_partition_ctx, 0, 8);
1390             memset(td->left_skip_ctx, 0, 8);
1391             if (s->s.h.keyframe || s->s.h.intraonly) {
1392                 memset(td->left_mode_ctx, DC_PRED, 16);
1393             } else {
1394                 memset(td->left_mode_ctx, NEARESTMV, 8);
1395             }
1396             memset(td->left_y_nnz_ctx, 0, 16);
1397             memset(td->left_uv_nnz_ctx, 0, 32);
1398             memset(td->left_segpred_ctx, 0, 8);
1399
1400             for (col = tile_col_start;
1401                  col < tile_col_end;
1402                  col += 8, yoff2 += 64 * bytesperpixel,
1403                  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1404                 // FIXME integrate with lf code (i.e. zero after each
1405                 // use, similar to invtxfm coefficients, or similar)
1406                 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1407                 decode_sb(td, row, col, lflvl_ptr,
1408                             yoff2, uvoff2, BL_64X64);
1409             }
1410
1411             // backup pre-loopfilter reconstruction data for intra
1412             // prediction of next row of sb64s
1413             tile_cols_len = tile_col_end - tile_col_start;
1414             if (row + 8 < s->rows) {
1415                 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1416                        f->data[0] + yoff + 63 * ls_y,
1417                        8 * tile_cols_len * bytesperpixel);
1418                 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1419                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1420                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1421                 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1422                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1423                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1424             }
1425
1426             vp9_report_tile_progress(s, row >> 3, 1);
1427         }
1428     }
1429     return 0;
1430 }
1431
1432 static av_always_inline
1433 int loopfilter_proc(AVCodecContext *avctx)
1434 {
1435     VP9Context *s = avctx->priv_data;
1436     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1437     VP9Filter *lflvl_ptr;
1438     int bytesperpixel = s->bytesperpixel, col, i;
1439     AVFrame *f;
1440
1441     f = s->s.frames[CUR_FRAME].tf.f;
1442     ls_y = f->linesize[0];
1443     ls_uv =f->linesize[1];
1444
1445     for (i = 0; i < s->sb_rows; i++) {
1446         vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1447
1448         if (s->s.h.filter.level) {
1449             yoff = (ls_y * 64)*i;
1450             uvoff =  (ls_uv * 64 >> s->ss_v)*i;
1451             lflvl_ptr = s->lflvl+s->sb_cols*i;
1452             for (col = 0; col < s->cols;
1453                  col += 8, yoff += 64 * bytesperpixel,
1454                  uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1455                 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1456                                      yoff, uvoff);
1457             }
1458         }
1459     }
1460     return 0;
1461 }
1462 #endif
1463
1464 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1465                             int *got_frame, AVPacket *pkt)
1466 {
1467     const uint8_t *data = pkt->data;
1468     int size = pkt->size;
1469     VP9Context *s = avctx->priv_data;
1470     int ret, i, j, ref;
1471     int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1472                             (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1473     AVFrame *f;
1474
1475     if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1476         return ret;
1477     } else if (ret == 0) {
1478         if (!s->s.refs[ref].f->buf[0]) {
1479             av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1480             return AVERROR_INVALIDDATA;
1481         }
1482         if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1483             return ret;
1484         ((AVFrame *)frame)->pts = pkt->pts;
1485 #if FF_API_PKT_PTS
1486 FF_DISABLE_DEPRECATION_WARNINGS
1487         ((AVFrame *)frame)->pkt_pts = pkt->pts;
1488 FF_ENABLE_DEPRECATION_WARNINGS
1489 #endif
1490         ((AVFrame *)frame)->pkt_dts = pkt->dts;
1491         for (i = 0; i < 8; i++) {
1492             if (s->next_refs[i].f->buf[0])
1493                 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1494             if (s->s.refs[i].f->buf[0] &&
1495                 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1496                 return ret;
1497         }
1498         *got_frame = 1;
1499         return pkt->size;
1500     }
1501     data += ret;
1502     size -= ret;
1503
1504     if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1505         if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1506             vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1507         if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1508             (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1509             return ret;
1510     }
1511     if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1512         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1513     if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1514         (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1515         return ret;
1516     if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1517         vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1518     if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1519         return ret;
1520     f = s->s.frames[CUR_FRAME].tf.f;
1521     f->key_frame = s->s.h.keyframe;
1522     f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1523
1524     if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1525         (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
1526          s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1527         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1528     }
1529
1530     // ref frame setup
1531     for (i = 0; i < 8; i++) {
1532         if (s->next_refs[i].f->buf[0])
1533             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1534         if (s->s.h.refreshrefmask & (1 << i)) {
1535             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1536         } else if (s->s.refs[i].f->buf[0]) {
1537             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1538         }
1539         if (ret < 0)
1540             return ret;
1541     }
1542
1543     if (avctx->hwaccel) {
1544         ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1545         if (ret < 0)
1546             return ret;
1547         ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1548         if (ret < 0)
1549             return ret;
1550         ret = avctx->hwaccel->end_frame(avctx);
1551         if (ret < 0)
1552             return ret;
1553         goto finish;
1554     }
1555
1556     // main tile decode loop
1557     memset(s->above_partition_ctx, 0, s->cols);
1558     memset(s->above_skip_ctx, 0, s->cols);
1559     if (s->s.h.keyframe || s->s.h.intraonly) {
1560         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1561     } else {
1562         memset(s->above_mode_ctx, NEARESTMV, s->cols);
1563     }
1564     memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1565     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1566     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1567     memset(s->above_segpred_ctx, 0, s->cols);
1568     s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1569         avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1570     if ((ret = update_block_buffers(avctx)) < 0) {
1571         av_log(avctx, AV_LOG_ERROR,
1572                "Failed to allocate block buffers\n");
1573         return ret;
1574     }
1575     if (s->s.h.refreshctx && s->s.h.parallelmode) {
1576         int j, k, l, m;
1577
1578         for (i = 0; i < 4; i++) {
1579             for (j = 0; j < 2; j++)
1580                 for (k = 0; k < 2; k++)
1581                     for (l = 0; l < 6; l++)
1582                         for (m = 0; m < 6; m++)
1583                             memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1584                                    s->prob.coef[i][j][k][l][m], 3);
1585             if (s->s.h.txfmmode == i)
1586                 break;
1587         }
1588         s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1589         ff_thread_finish_setup(avctx);
1590     } else if (!s->s.h.refreshctx) {
1591         ff_thread_finish_setup(avctx);
1592     }
1593
1594 #if HAVE_THREADS
1595     if (avctx->active_thread_type & FF_THREAD_SLICE) {
1596         for (i = 0; i < s->sb_rows; i++)
1597             atomic_store(&s->entries[i], 0);
1598     }
1599 #endif
1600
1601     do {
1602         for (i = 0; i < s->active_tile_cols; i++) {
1603             s->td[i].b = s->td[i].b_base;
1604             s->td[i].block = s->td[i].block_base;
1605             s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1606             s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1607             s->td[i].eob = s->td[i].eob_base;
1608             s->td[i].uveob[0] = s->td[i].uveob_base[0];
1609             s->td[i].uveob[1] = s->td[i].uveob_base[1];
1610         }
1611
1612 #if HAVE_THREADS
1613         if (avctx->active_thread_type == FF_THREAD_SLICE) {
1614             int tile_row, tile_col;
1615
1616             av_assert1(!s->pass);
1617
1618             for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1619                 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1620                     int64_t tile_size;
1621
1622                     if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1623                         tile_row == s->s.h.tiling.tile_rows - 1) {
1624                         tile_size = size;
1625                     } else {
1626                         tile_size = AV_RB32(data);
1627                         data += 4;
1628                         size -= 4;
1629                     }
1630                     if (tile_size > size)
1631                         return AVERROR_INVALIDDATA;
1632                     ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1633                     if (ret < 0)
1634                         return ret;
1635                     if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1636                         return AVERROR_INVALIDDATA;
1637                     data += tile_size;
1638                     size -= tile_size;
1639                 }
1640             }
1641
1642             ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1643         } else
1644 #endif
1645         {
1646             ret = decode_tiles(avctx, data, size);
1647             if (ret < 0) {
1648                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1649                 return ret;
1650             }
1651         }
1652
1653         // Sum all counts fields into td[0].counts for tile threading
1654         if (avctx->active_thread_type == FF_THREAD_SLICE)
1655             for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1656                 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1657                     ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1658
1659         if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1660             ff_vp9_adapt_probs(s);
1661             ff_thread_finish_setup(avctx);
1662         }
1663     } while (s->pass++ == 1);
1664     ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1665
1666 finish:
1667     // ref frame setup
1668     for (i = 0; i < 8; i++) {
1669         if (s->s.refs[i].f->buf[0])
1670             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1671         if (s->next_refs[i].f->buf[0] &&
1672             (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1673             return ret;
1674     }
1675
1676     if (!s->s.h.invisible) {
1677         if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1678             return ret;
1679         *got_frame = 1;
1680     }
1681
1682     return pkt->size;
1683 }
1684
1685 static void vp9_decode_flush(AVCodecContext *avctx)
1686 {
1687     VP9Context *s = avctx->priv_data;
1688     int i;
1689
1690     for (i = 0; i < 3; i++)
1691         vp9_frame_unref(avctx, &s->s.frames[i]);
1692     for (i = 0; i < 8; i++)
1693         ff_thread_release_buffer(avctx, &s->s.refs[i]);
1694 }
1695
1696 static int init_frames(AVCodecContext *avctx)
1697 {
1698     VP9Context *s = avctx->priv_data;
1699     int i;
1700
1701     for (i = 0; i < 3; i++) {
1702         s->s.frames[i].tf.f = av_frame_alloc();
1703         if (!s->s.frames[i].tf.f) {
1704             vp9_decode_free(avctx);
1705             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1706             return AVERROR(ENOMEM);
1707         }
1708     }
1709     for (i = 0; i < 8; i++) {
1710         s->s.refs[i].f = av_frame_alloc();
1711         s->next_refs[i].f = av_frame_alloc();
1712         if (!s->s.refs[i].f || !s->next_refs[i].f) {
1713             vp9_decode_free(avctx);
1714             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1715             return AVERROR(ENOMEM);
1716         }
1717     }
1718
1719     return 0;
1720 }
1721
1722 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1723 {
1724     VP9Context *s = avctx->priv_data;
1725
1726     avctx->internal->allocate_progress = 1;
1727     s->last_bpp = 0;
1728     s->s.h.filter.sharpness = -1;
1729
1730     return init_frames(avctx);
1731 }
1732
1733 #if HAVE_THREADS
1734 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1735 {
1736     return init_frames(avctx);
1737 }
1738
1739 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1740 {
1741     int i, ret;
1742     VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1743
1744     for (i = 0; i < 3; i++) {
1745         if (s->s.frames[i].tf.f->buf[0])
1746             vp9_frame_unref(dst, &s->s.frames[i]);
1747         if (ssrc->s.frames[i].tf.f->buf[0]) {
1748             if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1749                 return ret;
1750         }
1751     }
1752     for (i = 0; i < 8; i++) {
1753         if (s->s.refs[i].f->buf[0])
1754             ff_thread_release_buffer(dst, &s->s.refs[i]);
1755         if (ssrc->next_refs[i].f->buf[0]) {
1756             if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1757                 return ret;
1758         }
1759     }
1760
1761     s->s.h.invisible = ssrc->s.h.invisible;
1762     s->s.h.keyframe = ssrc->s.h.keyframe;
1763     s->s.h.intraonly = ssrc->s.h.intraonly;
1764     s->ss_v = ssrc->ss_v;
1765     s->ss_h = ssrc->ss_h;
1766     s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1767     s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1768     s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1769     s->bytesperpixel = ssrc->bytesperpixel;
1770     s->gf_fmt = ssrc->gf_fmt;
1771     s->w = ssrc->w;
1772     s->h = ssrc->h;
1773     s->s.h.bpp = ssrc->s.h.bpp;
1774     s->bpp_index = ssrc->bpp_index;
1775     s->pix_fmt = ssrc->pix_fmt;
1776     memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1777     memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1778     memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1779            sizeof(s->s.h.segmentation.feat));
1780
1781     return 0;
1782 }
1783 #endif
1784
1785 AVCodec ff_vp9_decoder = {
1786     .name                  = "vp9",
1787     .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
1788     .type                  = AVMEDIA_TYPE_VIDEO,
1789     .id                    = AV_CODEC_ID_VP9,
1790     .priv_data_size        = sizeof(VP9Context),
1791     .init                  = vp9_decode_init,
1792     .close                 = vp9_decode_free,
1793     .decode                = vp9_decode_frame,
1794     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1795     .caps_internal         = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1796     .flush                 = vp9_decode_flush,
1797     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1798     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1799     .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1800     .bsfs                  = "vp9_superframe_split",
1801     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
1802 #if CONFIG_VP9_DXVA2_HWACCEL
1803                                HWACCEL_DXVA2(vp9),
1804 #endif
1805 #if CONFIG_VP9_D3D11VA_HWACCEL
1806                                HWACCEL_D3D11VA(vp9),
1807 #endif
1808 #if CONFIG_VP9_D3D11VA2_HWACCEL
1809                                HWACCEL_D3D11VA2(vp9),
1810 #endif
1811 #if CONFIG_VP9_NVDEC_HWACCEL
1812                                HWACCEL_NVDEC(vp9),
1813 #endif
1814 #if CONFIG_VP9_VAAPI_HWACCEL
1815                                HWACCEL_VAAPI(vp9),
1816 #endif
1817                                NULL
1818                            },
1819 };