git.sesse.net Git - ffmpeg/blob - libavcodec/vp9.c

   1 /*
   2  * VP9 compatible video decoder
   3  *
   4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
   5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include "avcodec.h"
  25 #include "get_bits.h"
  26 #include "hwaccel.h"
  27 #include "internal.h"
  28 #include "profiles.h"
  29 #include "thread.h"
  30 #include "videodsp.h"
  31 #include "vp56.h"
  32 #include "vp9.h"
  33 #include "vp9data.h"
  34 #include "vp9dec.h"
  35 #include "libavutil/avassert.h"
  36 #include "libavutil/pixdesc.h"
  37
  38 #define VP9_SYNCCODE 0x498342
  39
  40 #if HAVE_THREADS
  41 static void vp9_free_entries(AVCodecContext *avctx) {
  42     VP9Context *s = avctx->priv_data;
  43
  44     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  45         pthread_mutex_destroy(&s->progress_mutex);
  46         pthread_cond_destroy(&s->progress_cond);
  47         av_freep(&s->entries);
  48     }
  49 }
  50
  51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
  52     VP9Context *s = avctx->priv_data;
  53     int i;
  54
  55     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  56         if (s->entries)
  57             av_freep(&s->entries);
  58
  59         s->entries = av_malloc_array(n, sizeof(atomic_int));
  60
  61         if (!s->entries) {
  62             av_freep(&s->entries);
  63             return AVERROR(ENOMEM);
  64         }
  65
  66         for (i  = 0; i < n; i++)
  67             atomic_init(&s->entries[i], 0);
  68
  69         pthread_mutex_init(&s->progress_mutex, NULL);
  70         pthread_cond_init(&s->progress_cond, NULL);
  71     }
  72     return 0;
  73 }
  74
  75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
  76     pthread_mutex_lock(&s->progress_mutex);
  77     atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
  78     pthread_cond_signal(&s->progress_cond);
  79     pthread_mutex_unlock(&s->progress_mutex);
  80 }
  81
  82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
  83     if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
  84         return;
  85
  86     pthread_mutex_lock(&s->progress_mutex);
  87     while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
  88         pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
  89     pthread_mutex_unlock(&s->progress_mutex);
  90 }
  91 #else
  92 static void vp9_free_entries(AVCodecContext *avctx) {}
  93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
  94 #endif
  95
  96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
  97 {
  98     ff_thread_release_buffer(avctx, &f->tf);
  99     av_buffer_unref(&f->extradata);
 100     av_buffer_unref(&f->hwaccel_priv_buf);
 101     f->segmentation_map = NULL;
 102     f->hwaccel_picture_private = NULL;
 103 }
 104
 105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
 106 {
 107     VP9Context *s = avctx->priv_data;
 108     int ret, sz;
 109
 110     ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
 111     if (ret < 0)
 112         return ret;
 113
 114     sz = 64 * s->sb_cols * s->sb_rows;
 115     f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
 116     if (!f->extradata) {
 117         goto fail;
 118     }
 119
 120     f->segmentation_map = f->extradata->data;
 121     f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
 122
 123     if (avctx->hwaccel) {
 124         const AVHWAccel *hwaccel = avctx->hwaccel;
 125         av_assert0(!f->hwaccel_picture_private);
 126         if (hwaccel->frame_priv_data_size) {
 127             f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
 128             if (!f->hwaccel_priv_buf)
 129                 goto fail;
 130             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
 131         }
 132     }
 133
 134     return 0;
 135
 136 fail:
 137     vp9_frame_unref(avctx, f);
 138     return AVERROR(ENOMEM);
 139 }
 140
 141 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
 142 {
 143     int ret;
 144
 145     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
 146     if (ret < 0)
 147         return ret;
 148
 149     dst->extradata = av_buffer_ref(src->extradata);
 150     if (!dst->extradata)
 151         goto fail;
 152
 153     dst->segmentation_map = src->segmentation_map;
 154     dst->mv = src->mv;
 155     dst->uses_2pass = src->uses_2pass;
 156
 157     if (src->hwaccel_picture_private) {
 158         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
 159         if (!dst->hwaccel_priv_buf)
 160             goto fail;
 161         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
 162     }
 163
 164     return 0;
 165
 166 fail:
 167     vp9_frame_unref(avctx, dst);
 168     return AVERROR(ENOMEM);
 169 }
 170
 171 static int update_size(AVCodecContext *avctx, int w, int h)
 172 {
 173 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
 174                      CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
 175                      CONFIG_VP9_NVDEC_HWACCEL + \
 176                      CONFIG_VP9_VAAPI_HWACCEL)
 177     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
 178     VP9Context *s = avctx->priv_data;
 179     uint8_t *p;
 180     int bytesperpixel = s->bytesperpixel, ret, cols, rows;
 181     int lflvl_len, i;
 182
 183     av_assert0(w > 0 && h > 0);
 184
 185     if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
 186         if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
 187             return ret;
 188
 189         switch (s->pix_fmt) {
 190         case AV_PIX_FMT_YUV420P:
 191         case AV_PIX_FMT_YUV420P10:
 192 #if CONFIG_VP9_DXVA2_HWACCEL
 193             *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 194 #endif
 195 #if CONFIG_VP9_D3D11VA_HWACCEL
 196             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
 197             *fmtp++ = AV_PIX_FMT_D3D11;
 198 #endif
 199 #if CONFIG_VP9_NVDEC_HWACCEL
 200             *fmtp++ = AV_PIX_FMT_CUDA;
 201 #endif
 202 #if CONFIG_VP9_VAAPI_HWACCEL
 203             *fmtp++ = AV_PIX_FMT_VAAPI;
 204 #endif
 205             break;
 206         case AV_PIX_FMT_YUV420P12:
 207 #if CONFIG_VP9_NVDEC_HWACCEL
 208             *fmtp++ = AV_PIX_FMT_CUDA;
 209 #endif
 210 #if CONFIG_VP9_VAAPI_HWACCEL
 211             *fmtp++ = AV_PIX_FMT_VAAPI;
 212 #endif
 213             break;
 214         }
 215
 216         *fmtp++ = s->pix_fmt;
 217         *fmtp = AV_PIX_FMT_NONE;
 218
 219         ret = ff_thread_get_format(avctx, pix_fmts);
 220         if (ret < 0)
 221             return ret;
 222
 223         avctx->pix_fmt = ret;
 224         s->gf_fmt  = s->pix_fmt;
 225         s->w = w;
 226         s->h = h;
 227     }
 228
 229     cols = (w + 7) >> 3;
 230     rows = (h + 7) >> 3;
 231
 232     if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
 233         return 0;
 234
 235     s->last_fmt  = s->pix_fmt;
 236     s->sb_cols   = (w + 63) >> 6;
 237     s->sb_rows   = (h + 63) >> 6;
 238     s->cols      = (w + 7) >> 3;
 239     s->rows      = (h + 7) >> 3;
 240     lflvl_len    = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
 241
 242 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
 243     av_freep(&s->intra_pred_data[0]);
 244     // FIXME we slightly over-allocate here for subsampled chroma, but a little
 245     // bit of padding shouldn't affect performance...
 246     p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
 247                                 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
 248     if (!p)
 249         return AVERROR(ENOMEM);
 250     assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
 251     assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
 252     assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
 253     assign(s->above_y_nnz_ctx,     uint8_t *,             16);
 254     assign(s->above_mode_ctx,      uint8_t *,             16);
 255     assign(s->above_mv_ctx,        VP56mv(*)[2],          16);
 256     assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
 257     assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
 258     assign(s->above_partition_ctx, uint8_t *,              8);
 259     assign(s->above_skip_ctx,      uint8_t *,              8);
 260     assign(s->above_txfm_ctx,      uint8_t *,              8);
 261     assign(s->above_segpred_ctx,   uint8_t *,              8);
 262     assign(s->above_intra_ctx,     uint8_t *,              8);
 263     assign(s->above_comp_ctx,      uint8_t *,              8);
 264     assign(s->above_ref_ctx,       uint8_t *,              8);
 265     assign(s->above_filter_ctx,    uint8_t *,              8);
 266     assign(s->lflvl,               VP9Filter *,            lflvl_len);
 267 #undef assign
 268
 269     if (s->td) {
 270         for (i = 0; i < s->active_tile_cols; i++) {
 271             av_freep(&s->td[i].b_base);
 272             av_freep(&s->td[i].block_base);
 273         }
 274     }
 275
 276     if (s->s.h.bpp != s->last_bpp) {
 277         ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
 278         ff_videodsp_init(&s->vdsp, s->s.h.bpp);
 279         s->last_bpp = s->s.h.bpp;
 280     }
 281
 282     return 0;
 283 }
 284
 285 static int update_block_buffers(AVCodecContext *avctx)
 286 {
 287     int i;
 288     VP9Context *s = avctx->priv_data;
 289     int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
 290     VP9TileData *td = &s->td[0];
 291
 292     if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
 293         return 0;
 294
 295     av_free(td->b_base);
 296     av_free(td->block_base);
 297     chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
 298     chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
 299     if (s->s.frames[CUR_FRAME].uses_2pass) {
 300         int sbs = s->sb_cols * s->sb_rows;
 301
 302         td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
 303         td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 304                                     16 * 16 + 2 * chroma_eobs) * sbs);
 305         if (!td->b_base || !td->block_base)
 306             return AVERROR(ENOMEM);
 307         td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
 308         td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
 309         td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
 310         td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
 311         td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
 312     } else {
 313         for (i = 1; i < s->active_tile_cols; i++) {
 314             if (s->td[i].b_base && s->td[i].block_base) {
 315                 av_free(s->td[i].b_base);
 316                 av_free(s->td[i].block_base);
 317             }
 318         }
 319         for (i = 0; i < s->active_tile_cols; i++) {
 320             s->td[i].b_base = av_malloc(sizeof(VP9Block));
 321             s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 322                                        16 * 16 + 2 * chroma_eobs);
 323             if (!s->td[i].b_base || !s->td[i].block_base)
 324                 return AVERROR(ENOMEM);
 325             s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
 326             s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
 327             s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
 328             s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
 329             s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
 330         }
 331     }
 332     s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
 333
 334     return 0;
 335 }
 336
 337 // The sign bit is at the end, not the start, of a bit sequence
 338 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
 339 {
 340     int v = get_bits(gb, n);
 341     return get_bits1(gb) ? -v : v;
 342 }
 343
 344 static av_always_inline int inv_recenter_nonneg(int v, int m)
 345 {
 346     if (v > 2 * m)
 347         return v;
 348     if (v & 1)
 349         return m - ((v + 1) >> 1);
 350     return m + (v >> 1);
 351 }
 352
 353 // differential forward probability updates
 354 static int update_prob(VP56RangeCoder *c, int p)
 355 {
 356     static const int inv_map_table[255] = {
 357           7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
 358         189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
 359          10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
 360          25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
 361          40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
 362          55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
 363          70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
 364          86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
 365         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
 366         116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
 367         131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
 368         146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 369         161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
 370         177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
 371         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
 372         207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
 373         222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
 374         237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
 375         252, 253, 253,
 376     };
 377     int d;
 378
 379     /* This code is trying to do a differential probability update. For a
 380      * current probability A in the range [1, 255], the difference to a new
 381      * probability of any value can be expressed differentially as 1-A, 255-A
 382      * where some part of this (absolute range) exists both in positive as
 383      * well as the negative part, whereas another part only exists in one
 384      * half. We're trying to code this shared part differentially, i.e.
 385      * times two where the value of the lowest bit specifies the sign, and
 386      * the single part is then coded on top of this. This absolute difference
 387      * then again has a value of [0, 254], but a bigger value in this range
 388      * indicates that we're further away from the original value A, so we
 389      * can code this as a VLC code, since higher values are increasingly
 390      * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
 391      * updates vs. the 'fine, exact' updates further down the range, which
 392      * adds one extra dimension to this differential update model. */
 393
 394     if (!vp8_rac_get(c)) {
 395         d = vp8_rac_get_uint(c, 4) + 0;
 396     } else if (!vp8_rac_get(c)) {
 397         d = vp8_rac_get_uint(c, 4) + 16;
 398     } else if (!vp8_rac_get(c)) {
 399         d = vp8_rac_get_uint(c, 5) + 32;
 400     } else {
 401         d = vp8_rac_get_uint(c, 7);
 402         if (d >= 65)
 403             d = (d << 1) - 65 + vp8_rac_get(c);
 404         d += 64;
 405         av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
 406     }
 407
 408     return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
 409                     255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 410 }
 411
 412 static int read_colorspace_details(AVCodecContext *avctx)
 413 {
 414     static const enum AVColorSpace colorspaces[8] = {
 415         AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
 416         AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
 417     };
 418     VP9Context *s = avctx->priv_data;
 419     int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
 420
 421     s->bpp_index = bits;
 422     s->s.h.bpp = 8 + bits * 2;
 423     s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
 424     avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
 425     if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
 426         static const enum AVPixelFormat pix_fmt_rgb[3] = {
 427             AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
 428         };
 429         s->ss_h = s->ss_v = 0;
 430         avctx->color_range = AVCOL_RANGE_JPEG;
 431         s->pix_fmt = pix_fmt_rgb[bits];
 432         if (avctx->profile & 1) {
 433             if (get_bits1(&s->gb)) {
 434                 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
 435                 return AVERROR_INVALIDDATA;
 436             }
 437         } else {
 438             av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
 439                    avctx->profile);
 440             return AVERROR_INVALIDDATA;
 441         }
 442     } else {
 443         static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
 444             { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
 445               { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
 446             { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
 447               { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
 448             { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
 449               { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
 450         };
 451         avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
 452         if (avctx->profile & 1) {
 453             s->ss_h = get_bits1(&s->gb);
 454             s->ss_v = get_bits1(&s->gb);
 455             s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
 456             if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
 457                 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
 458                        avctx->profile);
 459                 return AVERROR_INVALIDDATA;
 460             } else if (get_bits1(&s->gb)) {
 461                 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
 462                        avctx->profile);
 463                 return AVERROR_INVALIDDATA;
 464             }
 465         } else {
 466             s->ss_h = s->ss_v = 1;
 467             s->pix_fmt = pix_fmt_for_ss[bits][1][1];
 468         }
 469     }
 470
 471     return 0;
 472 }
 473
 474 static int decode_frame_header(AVCodecContext *avctx,
 475                                const uint8_t *data, int size, int *ref)
 476 {
 477     VP9Context *s = avctx->priv_data;
 478     int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
 479     int last_invisible;
 480     const uint8_t *data2;
 481
 482     /* general header */
 483     if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
 484         av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
 485         return ret;
 486     }
 487     if (get_bits(&s->gb, 2) != 0x2) { // frame marker
 488         av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
 489         return AVERROR_INVALIDDATA;
 490     }
 491     avctx->profile  = get_bits1(&s->gb);
 492     avctx->profile |= get_bits1(&s->gb) << 1;
 493     if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
 494     if (avctx->profile > 3) {
 495         av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
 496         return AVERROR_INVALIDDATA;
 497     }
 498     s->s.h.profile = avctx->profile;
 499     if (get_bits1(&s->gb)) {
 500         *ref = get_bits(&s->gb, 3);
 501         return 0;
 502     }
 503
 504     s->last_keyframe  = s->s.h.keyframe;
 505     s->s.h.keyframe   = !get_bits1(&s->gb);
 506
 507     last_invisible   = s->s.h.invisible;
 508     s->s.h.invisible = !get_bits1(&s->gb);
 509     s->s.h.errorres  = get_bits1(&s->gb);
 510     s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
 511
 512     if (s->s.h.keyframe) {
 513         if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 514             av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 515             return AVERROR_INVALIDDATA;
 516         }
 517         if ((ret = read_colorspace_details(avctx)) < 0)
 518             return ret;
 519         // for profile 1, here follows the subsampling bits
 520         s->s.h.refreshrefmask = 0xff;
 521         w = get_bits(&s->gb, 16) + 1;
 522         h = get_bits(&s->gb, 16) + 1;
 523         if (get_bits1(&s->gb)) // display size
 524             skip_bits(&s->gb, 32);
 525     } else {
 526         s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
 527         s->s.h.resetctx  = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
 528         if (s->s.h.intraonly) {
 529             if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 530                 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 531                 return AVERROR_INVALIDDATA;
 532             }
 533             if (avctx->profile >= 1) {
 534                 if ((ret = read_colorspace_details(avctx)) < 0)
 535                     return ret;
 536             } else {
 537                 s->ss_h = s->ss_v = 1;
 538                 s->s.h.bpp = 8;
 539                 s->bpp_index = 0;
 540                 s->bytesperpixel = 1;
 541                 s->pix_fmt = AV_PIX_FMT_YUV420P;
 542                 avctx->colorspace = AVCOL_SPC_BT470BG;
 543                 avctx->color_range = AVCOL_RANGE_MPEG;
 544             }
 545             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 546             w = get_bits(&s->gb, 16) + 1;
 547             h = get_bits(&s->gb, 16) + 1;
 548             if (get_bits1(&s->gb)) // display size
 549                 skip_bits(&s->gb, 32);
 550         } else {
 551             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 552             s->s.h.refidx[0]      = get_bits(&s->gb, 3);
 553             s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
 554             s->s.h.refidx[1]      = get_bits(&s->gb, 3);
 555             s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
 556             s->s.h.refidx[2]      = get_bits(&s->gb, 3);
 557             s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
 558             if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
 559                 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
 560                 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
 561                 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
 562                 return AVERROR_INVALIDDATA;
 563             }
 564             if (get_bits1(&s->gb)) {
 565                 w = s->s.refs[s->s.h.refidx[0]].f->width;
 566                 h = s->s.refs[s->s.h.refidx[0]].f->height;
 567             } else if (get_bits1(&s->gb)) {
 568                 w = s->s.refs[s->s.h.refidx[1]].f->width;
 569                 h = s->s.refs[s->s.h.refidx[1]].f->height;
 570             } else if (get_bits1(&s->gb)) {
 571                 w = s->s.refs[s->s.h.refidx[2]].f->width;
 572                 h = s->s.refs[s->s.h.refidx[2]].f->height;
 573             } else {
 574                 w = get_bits(&s->gb, 16) + 1;
 575                 h = get_bits(&s->gb, 16) + 1;
 576             }
 577             // Note that in this code, "CUR_FRAME" is actually before we
 578             // have formally allocated a frame, and thus actually represents
 579             // the _last_ frame
 580             s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
 581                                        s->s.frames[CUR_FRAME].tf.f->height == h;
 582             if (get_bits1(&s->gb)) // display size
 583                 skip_bits(&s->gb, 32);
 584             s->s.h.highprecisionmvs = get_bits1(&s->gb);
 585             s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
 586                                                   get_bits(&s->gb, 2);
 587             s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
 588                                   s->s.h.signbias[0] != s->s.h.signbias[2];
 589             if (s->s.h.allowcompinter) {
 590                 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
 591                     s->s.h.fixcompref    = 2;
 592                     s->s.h.varcompref[0] = 0;
 593                     s->s.h.varcompref[1] = 1;
 594                 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
 595                     s->s.h.fixcompref    = 1;
 596                     s->s.h.varcompref[0] = 0;
 597                     s->s.h.varcompref[1] = 2;
 598                 } else {
 599                     s->s.h.fixcompref    = 0;
 600                     s->s.h.varcompref[0] = 1;
 601                     s->s.h.varcompref[1] = 2;
 602                 }
 603             }
 604         }
 605     }
 606     s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
 607     s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
 608     s->s.h.framectxid   = c = get_bits(&s->gb, 2);
 609     if (s->s.h.keyframe || s->s.h.intraonly)
 610         s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
 611
 612     /* loopfilter header data */
 613     if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
 614         // reset loopfilter defaults
 615         s->s.h.lf_delta.ref[0] = 1;
 616         s->s.h.lf_delta.ref[1] = 0;
 617         s->s.h.lf_delta.ref[2] = -1;
 618         s->s.h.lf_delta.ref[3] = -1;
 619         s->s.h.lf_delta.mode[0] = 0;
 620         s->s.h.lf_delta.mode[1] = 0;
 621         memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
 622     }
 623     s->s.h.filter.level = get_bits(&s->gb, 6);
 624     sharp = get_bits(&s->gb, 3);
 625     // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
 626     // the old cache values since they are still valid
 627     if (s->s.h.filter.sharpness != sharp) {
 628         for (i = 1; i <= 63; i++) {
 629             int limit = i;
 630
 631             if (sharp > 0) {
 632                 limit >>= (sharp + 3) >> 2;
 633                 limit = FFMIN(limit, 9 - sharp);
 634             }
 635             limit = FFMAX(limit, 1);
 636
 637             s->filter_lut.lim_lut[i] = limit;
 638             s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
 639         }
 640     }
 641     s->s.h.filter.sharpness = sharp;
 642     if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
 643         if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
 644             for (i = 0; i < 4; i++)
 645                 if (get_bits1(&s->gb))
 646                     s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
 647             for (i = 0; i < 2; i++)
 648                 if (get_bits1(&s->gb))
 649                     s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
 650         }
 651     }
 652
 653     /* quantization header data */
 654     s->s.h.yac_qi      = get_bits(&s->gb, 8);
 655     s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 656     s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 657     s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 658     s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
 659                        s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
 660     if (s->s.h.lossless)
 661         avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
 662
 663     /* segmentation header info */
 664     if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
 665         if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
 666             for (i = 0; i < 7; i++)
 667                 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
 668                                  get_bits(&s->gb, 8) : 255;
 669             if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
 670                 for (i = 0; i < 3; i++)
 671                     s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
 672                                          get_bits(&s->gb, 8) : 255;
 673         }
 674
 675         if (get_bits1(&s->gb)) {
 676             s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
 677             for (i = 0; i < 8; i++) {
 678                 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
 679                     s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
 680                 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
 681                     s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
 682                 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
 683                     s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
 684                 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
 685             }
 686         }
 687     }
 688
 689     // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
 690     for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
 691         int qyac, qydc, quvac, quvdc, lflvl, sh;
 692
 693         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
 694             if (s->s.h.segmentation.absolute_vals)
 695                 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
 696             else
 697                 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
 698         } else {
 699             qyac  = s->s.h.yac_qi;
 700         }
 701         qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
 702         quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
 703         quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
 704         qyac  = av_clip_uintp2(qyac, 8);
 705
 706         s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
 707         s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
 708         s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
 709         s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
 710
 711         sh = s->s.h.filter.level >= 32;
 712         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
 713             if (s->s.h.segmentation.absolute_vals)
 714                 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
 715             else
 716                 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
 717         } else {
 718             lflvl  = s->s.h.filter.level;
 719         }
 720         if (s->s.h.lf_delta.enabled) {
 721             s->s.h.segmentation.feat[i].lflvl[0][0] =
 722             s->s.h.segmentation.feat[i].lflvl[0][1] =
 723                 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
 724             for (j = 1; j < 4; j++) {
 725                 s->s.h.segmentation.feat[i].lflvl[j][0] =
 726                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 727                                              s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
 728                 s->s.h.segmentation.feat[i].lflvl[j][1] =
 729                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 730                                              s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
 731             }
 732         } else {
 733             memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
 734                    sizeof(s->s.h.segmentation.feat[i].lflvl));
 735         }
 736     }
 737
 738     /* tiling info */
 739     if ((ret = update_size(avctx, w, h)) < 0) {
 740         av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
 741                w, h, s->pix_fmt);
 742         return ret;
 743     }
 744     for (s->s.h.tiling.log2_tile_cols = 0;
 745          s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
 746          s->s.h.tiling.log2_tile_cols++) ;
 747     for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
 748     max = FFMAX(0, max - 1);
 749     while (max > s->s.h.tiling.log2_tile_cols) {
 750         if (get_bits1(&s->gb))
 751             s->s.h.tiling.log2_tile_cols++;
 752         else
 753             break;
 754     }
 755     s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
 756     s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
 757     if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
 758         int n_range_coders;
 759         VP56RangeCoder *rc;
 760
 761         if (s->td) {
 762             for (i = 0; i < s->active_tile_cols; i++) {
 763                 av_free(s->td[i].b_base);
 764                 av_free(s->td[i].block_base);
 765             }
 766             av_free(s->td);
 767         }
 768
 769         s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
 770         vp9_free_entries(avctx);
 771         s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
 772                               s->s.h.tiling.tile_cols : 1;
 773         vp9_alloc_entries(avctx, s->sb_rows);
 774         if (avctx->active_thread_type == FF_THREAD_SLICE) {
 775             n_range_coders = 4; // max_tile_rows
 776         } else {
 777             n_range_coders = s->s.h.tiling.tile_cols;
 778         }
 779         s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
 780                                  n_range_coders * sizeof(VP56RangeCoder));
 781         if (!s->td)
 782             return AVERROR(ENOMEM);
 783         rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
 784         for (i = 0; i < s->active_tile_cols; i++) {
 785             s->td[i].s = s;
 786             s->td[i].c_b = rc;
 787             rc += n_range_coders;
 788         }
 789     }
 790
 791     /* check reference frames */
 792     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 793         for (i = 0; i < 3; i++) {
 794             AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
 795             int refw = ref->width, refh = ref->height;
 796
 797             if (ref->format != avctx->pix_fmt) {
 798                 av_log(avctx, AV_LOG_ERROR,
 799                        "Ref pixfmt (%s) did not match current frame (%s)",
 800                        av_get_pix_fmt_name(ref->format),
 801                        av_get_pix_fmt_name(avctx->pix_fmt));
 802                 return AVERROR_INVALIDDATA;
 803             } else if (refw == w && refh == h) {
 804                 s->mvscale[i][0] = s->mvscale[i][1] = 0;
 805             } else {
 806                 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
 807                     av_log(avctx, AV_LOG_ERROR,
 808                            "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
 809                            refw, refh, w, h);
 810                     return AVERROR_INVALIDDATA;
 811                 }
 812                 s->mvscale[i][0] = (refw << 14) / w;
 813                 s->mvscale[i][1] = (refh << 14) / h;
 814                 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
 815                 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
 816             }
 817         }
 818     }
 819
 820     if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
 821         s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
 822                            s->prob_ctx[3].p = ff_vp9_default_probs;
 823         memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
 824                sizeof(ff_vp9_default_coef_probs));
 825         memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
 826                sizeof(ff_vp9_default_coef_probs));
 827         memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
 828                sizeof(ff_vp9_default_coef_probs));
 829         memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
 830                sizeof(ff_vp9_default_coef_probs));
 831     } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
 832         s->prob_ctx[c].p = ff_vp9_default_probs;
 833         memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
 834                sizeof(ff_vp9_default_coef_probs));
 835     }
 836
 837     // next 16 bits is size of the rest of the header (arith-coded)
 838     s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
 839     s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
 840
 841     data2 = align_get_bits(&s->gb);
 842     if (size2 > size - (data2 - data)) {
 843         av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
 844         return AVERROR_INVALIDDATA;
 845     }
 846     ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
 847     if (ret < 0)
 848         return ret;
 849
 850     if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
 851         av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
 852         return AVERROR_INVALIDDATA;
 853     }
 854
 855     for (i = 0; i < s->active_tile_cols; i++) {
 856         if (s->s.h.keyframe || s->s.h.intraonly) {
 857             memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
 858             memset(s->td[i].counts.eob,  0, sizeof(s->td[0].counts.eob));
 859         } else {
 860             memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
 861         }
 862     }
 863
 864     /* FIXME is it faster to not copy here, but do it down in the fw updates
 865      * as explicit copies if the fw update is missing (and skip the copy upon
 866      * fw update)? */
 867     s->prob.p = s->prob_ctx[c].p;
 868
 869     // txfm updates
 870     if (s->s.h.lossless) {
 871         s->s.h.txfmmode = TX_4X4;
 872     } else {
 873         s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
 874         if (s->s.h.txfmmode == 3)
 875             s->s.h.txfmmode += vp8_rac_get(&s->c);
 876
 877         if (s->s.h.txfmmode == TX_SWITCHABLE) {
 878             for (i = 0; i < 2; i++)
 879                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 880                     s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
 881             for (i = 0; i < 2; i++)
 882                 for (j = 0; j < 2; j++)
 883                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 884                         s->prob.p.tx16p[i][j] =
 885                             update_prob(&s->c, s->prob.p.tx16p[i][j]);
 886             for (i = 0; i < 2; i++)
 887                 for (j = 0; j < 3; j++)
 888                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 889                         s->prob.p.tx32p[i][j] =
 890                             update_prob(&s->c, s->prob.p.tx32p[i][j]);
 891         }
 892     }
 893
 894     // coef updates
 895     for (i = 0; i < 4; i++) {
 896         uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
 897         if (vp8_rac_get(&s->c)) {
 898             for (j = 0; j < 2; j++)
 899                 for (k = 0; k < 2; k++)
 900                     for (l = 0; l < 6; l++)
 901                         for (m = 0; m < 6; m++) {
 902                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 903                             uint8_t *r = ref[j][k][l][m];
 904                             if (m >= 3 && l == 0) // dc only has 3 pt
 905                                 break;
 906                             for (n = 0; n < 3; n++) {
 907                                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 908                                     p[n] = update_prob(&s->c, r[n]);
 909                                 else
 910                                     p[n] = r[n];
 911                             }
 912                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 913                         }
 914         } else {
 915             for (j = 0; j < 2; j++)
 916                 for (k = 0; k < 2; k++)
 917                     for (l = 0; l < 6; l++)
 918                         for (m = 0; m < 6; m++) {
 919                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 920                             uint8_t *r = ref[j][k][l][m];
 921                             if (m > 3 && l == 0) // dc only has 3 pt
 922                                 break;
 923                             memcpy(p, r, 3);
 924                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 925                         }
 926         }
 927         if (s->s.h.txfmmode == i)
 928             break;
 929     }
 930
 931     // mode updates
 932     for (i = 0; i < 3; i++)
 933         if (vp56_rac_get_prob_branchy(&s->c, 252))
 934             s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
 935     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 936         for (i = 0; i < 7; i++)
 937             for (j = 0; j < 3; j++)
 938                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 939                     s->prob.p.mv_mode[i][j] =
 940                         update_prob(&s->c, s->prob.p.mv_mode[i][j]);
 941
 942         if (s->s.h.filtermode == FILTER_SWITCHABLE)
 943             for (i = 0; i < 4; i++)
 944                 for (j = 0; j < 2; j++)
 945                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 946                         s->prob.p.filter[i][j] =
 947                             update_prob(&s->c, s->prob.p.filter[i][j]);
 948
 949         for (i = 0; i < 4; i++)
 950             if (vp56_rac_get_prob_branchy(&s->c, 252))
 951                 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
 952
 953         if (s->s.h.allowcompinter) {
 954             s->s.h.comppredmode = vp8_rac_get(&s->c);
 955             if (s->s.h.comppredmode)
 956                 s->s.h.comppredmode += vp8_rac_get(&s->c);
 957             if (s->s.h.comppredmode == PRED_SWITCHABLE)
 958                 for (i = 0; i < 5; i++)
 959                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 960                         s->prob.p.comp[i] =
 961                             update_prob(&s->c, s->prob.p.comp[i]);
 962         } else {
 963             s->s.h.comppredmode = PRED_SINGLEREF;
 964         }
 965
 966         if (s->s.h.comppredmode != PRED_COMPREF) {
 967             for (i = 0; i < 5; i++) {
 968                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 969                     s->prob.p.single_ref[i][0] =
 970                         update_prob(&s->c, s->prob.p.single_ref[i][0]);
 971                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 972                     s->prob.p.single_ref[i][1] =
 973                         update_prob(&s->c, s->prob.p.single_ref[i][1]);
 974             }
 975         }
 976
 977         if (s->s.h.comppredmode != PRED_SINGLEREF) {
 978             for (i = 0; i < 5; i++)
 979                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 980                     s->prob.p.comp_ref[i] =
 981                         update_prob(&s->c, s->prob.p.comp_ref[i]);
 982         }
 983
 984         for (i = 0; i < 4; i++)
 985             for (j = 0; j < 9; j++)
 986                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 987                     s->prob.p.y_mode[i][j] =
 988                         update_prob(&s->c, s->prob.p.y_mode[i][j]);
 989
 990         for (i = 0; i < 4; i++)
 991             for (j = 0; j < 4; j++)
 992                 for (k = 0; k < 3; k++)
 993                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 994                         s->prob.p.partition[3 - i][j][k] =
 995                             update_prob(&s->c,
 996                                         s->prob.p.partition[3 - i][j][k]);
 997
 998         // mv fields don't use the update_prob subexp model for some reason
 999         for (i = 0; i < 3; i++)
1000             if (vp56_rac_get_prob_branchy(&s->c, 252))
1001                 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1002
1003         for (i = 0; i < 2; i++) {
1004             if (vp56_rac_get_prob_branchy(&s->c, 252))
1005                 s->prob.p.mv_comp[i].sign =
1006                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1007
1008             for (j = 0; j < 10; j++)
1009                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1010                     s->prob.p.mv_comp[i].classes[j] =
1011                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1012
1013             if (vp56_rac_get_prob_branchy(&s->c, 252))
1014                 s->prob.p.mv_comp[i].class0 =
1015                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1016
1017             for (j = 0; j < 10; j++)
1018                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019                     s->prob.p.mv_comp[i].bits[j] =
1020                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1021         }
1022
1023         for (i = 0; i < 2; i++) {
1024             for (j = 0; j < 2; j++)
1025                 for (k = 0; k < 3; k++)
1026                     if (vp56_rac_get_prob_branchy(&s->c, 252))
1027                         s->prob.p.mv_comp[i].class0_fp[j][k] =
1028                             (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1029
1030             for (j = 0; j < 3; j++)
1031                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1032                     s->prob.p.mv_comp[i].fp[j] =
1033                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1034         }
1035
1036         if (s->s.h.highprecisionmvs) {
1037             for (i = 0; i < 2; i++) {
1038                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1039                     s->prob.p.mv_comp[i].class0_hp =
1040                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041
1042                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043                     s->prob.p.mv_comp[i].hp =
1044                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1045             }
1046         }
1047     }
1048
1049     return (data2 - data) + size2;
1050 }
1051
1052 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1053                       ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1054 {
1055     const VP9Context *s = td->s;
1056     int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1057             (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1058     const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1059                                                      s->prob.p.partition[bl][c];
1060     enum BlockPartition bp;
1061     ptrdiff_t hbs = 4 >> bl;
1062     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1063     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1064     int bytesperpixel = s->bytesperpixel;
1065
1066     if (bl == BL_8X8) {
1067         bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1068         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1069     } else if (col + hbs < s->cols) { // FIXME why not <=?
1070         if (row + hbs < s->rows) { // FIXME why not <=?
1071             bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1072             switch (bp) {
1073             case PARTITION_NONE:
1074                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1075                 break;
1076             case PARTITION_H:
1077                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1078                 yoff  += hbs * 8 * y_stride;
1079                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1080                 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1081                 break;
1082             case PARTITION_V:
1083                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1084                 yoff  += hbs * 8 * bytesperpixel;
1085                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1086                 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1087                 break;
1088             case PARTITION_SPLIT:
1089                 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1090                 decode_sb(td, row, col + hbs, lflvl,
1091                           yoff + 8 * hbs * bytesperpixel,
1092                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1093                 yoff  += hbs * 8 * y_stride;
1094                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1095                 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1096                 decode_sb(td, row + hbs, col + hbs, lflvl,
1097                           yoff + 8 * hbs * bytesperpixel,
1098                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1099                 break;
1100             default:
1101                 av_assert0(0);
1102             }
1103         } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1104             bp = PARTITION_SPLIT;
1105             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1106             decode_sb(td, row, col + hbs, lflvl,
1107                       yoff + 8 * hbs * bytesperpixel,
1108                       uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1109         } else {
1110             bp = PARTITION_H;
1111             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1112         }
1113     } else if (row + hbs < s->rows) { // FIXME why not <=?
1114         if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1115             bp = PARTITION_SPLIT;
1116             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1117             yoff  += hbs * 8 * y_stride;
1118             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1119             decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1120         } else {
1121             bp = PARTITION_V;
1122             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1123         }
1124     } else {
1125         bp = PARTITION_SPLIT;
1126         decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1127     }
1128     td->counts.partition[bl][c][bp]++;
1129 }
1130
1131 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1132                           ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1133 {
1134     const VP9Context *s = td->s;
1135     VP9Block *b = td->b;
1136     ptrdiff_t hbs = 4 >> bl;
1137     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1138     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1139     int bytesperpixel = s->bytesperpixel;
1140
1141     if (bl == BL_8X8) {
1142         av_assert2(b->bl == BL_8X8);
1143         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1144     } else if (td->b->bl == bl) {
1145         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1146         if (b->bp == PARTITION_H && row + hbs < s->rows) {
1147             yoff  += hbs * 8 * y_stride;
1148             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1149             ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1150         } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1151             yoff  += hbs * 8 * bytesperpixel;
1152             uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1153             ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1154         }
1155     } else {
1156         decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1157         if (col + hbs < s->cols) { // FIXME why not <=?
1158             if (row + hbs < s->rows) {
1159                 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1160                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1161                 yoff  += hbs * 8 * y_stride;
1162                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1163                 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1164                 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1165                               yoff + 8 * hbs * bytesperpixel,
1166                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1167             } else {
1168                 yoff  += hbs * 8 * bytesperpixel;
1169                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1170                 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1171             }
1172         } else if (row + hbs < s->rows) {
1173             yoff  += hbs * 8 * y_stride;
1174             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1175             decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1176         }
1177     }
1178 }
1179
1180 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1181 {
1182     int sb_start = ( idx      * n) >> log2_n;
1183     int sb_end   = ((idx + 1) * n) >> log2_n;
1184     *start = FFMIN(sb_start, n) << 3;
1185     *end   = FFMIN(sb_end,   n) << 3;
1186 }
1187
1188 static void free_buffers(VP9Context *s)
1189 {
1190     int i;
1191
1192     av_freep(&s->intra_pred_data[0]);
1193     for (i = 0; i < s->active_tile_cols; i++) {
1194         av_freep(&s->td[i].b_base);
1195         av_freep(&s->td[i].block_base);
1196     }
1197 }
1198
1199 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1200 {
1201     VP9Context *s = avctx->priv_data;
1202     int i;
1203
1204     for (i = 0; i < 3; i++) {
1205         if (s->s.frames[i].tf.f->buf[0])
1206             vp9_frame_unref(avctx, &s->s.frames[i]);
1207         av_frame_free(&s->s.frames[i].tf.f);
1208     }
1209     for (i = 0; i < 8; i++) {
1210         if (s->s.refs[i].f->buf[0])
1211             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1212         av_frame_free(&s->s.refs[i].f);
1213         if (s->next_refs[i].f->buf[0])
1214             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1215         av_frame_free(&s->next_refs[i].f);
1216     }
1217
1218     free_buffers(s);
1219     vp9_free_entries(avctx);
1220     av_freep(&s->td);
1221     return 0;
1222 }
1223
1224 static int decode_tiles(AVCodecContext *avctx,
1225                         const uint8_t *data, int size)
1226 {
1227     VP9Context *s = avctx->priv_data;
1228     VP9TileData *td = &s->td[0];
1229     int row, col, tile_row, tile_col, ret;
1230     int bytesperpixel;
1231     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1232     AVFrame *f;
1233     ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1234
1235     f = s->s.frames[CUR_FRAME].tf.f;
1236     ls_y = f->linesize[0];
1237     ls_uv =f->linesize[1];
1238     bytesperpixel = s->bytesperpixel;
1239
1240     yoff = uvoff = 0;
1241     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1242         set_tile_offset(&tile_row_start, &tile_row_end,
1243                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1244
1245         for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1246             int64_t tile_size;
1247
1248             if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1249                 tile_row == s->s.h.tiling.tile_rows - 1) {
1250                 tile_size = size;
1251             } else {
1252                 tile_size = AV_RB32(data);
1253                 data += 4;
1254                 size -= 4;
1255             }
1256             if (tile_size > size) {
1257                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1258                 return AVERROR_INVALIDDATA;
1259             }
1260             ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1261             if (ret < 0)
1262                 return ret;
1263             if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1264                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1265                 return AVERROR_INVALIDDATA;
1266             }
1267             data += tile_size;
1268             size -= tile_size;
1269         }
1270
1271         for (row = tile_row_start; row < tile_row_end;
1272              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1273             VP9Filter *lflvl_ptr = s->lflvl;
1274             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1275
1276             for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1277                 set_tile_offset(&tile_col_start, &tile_col_end,
1278                                 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1279                 td->tile_col_start = tile_col_start;
1280                 if (s->pass != 2) {
1281                     memset(td->left_partition_ctx, 0, 8);
1282                     memset(td->left_skip_ctx, 0, 8);
1283                     if (s->s.h.keyframe || s->s.h.intraonly) {
1284                         memset(td->left_mode_ctx, DC_PRED, 16);
1285                     } else {
1286                         memset(td->left_mode_ctx, NEARESTMV, 8);
1287                     }
1288                     memset(td->left_y_nnz_ctx, 0, 16);
1289                     memset(td->left_uv_nnz_ctx, 0, 32);
1290                     memset(td->left_segpred_ctx, 0, 8);
1291
1292                     td->c = &td->c_b[tile_col];
1293                 }
1294
1295                 for (col = tile_col_start;
1296                      col < tile_col_end;
1297                      col += 8, yoff2 += 64 * bytesperpixel,
1298                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1299                     // FIXME integrate with lf code (i.e. zero after each
1300                     // use, similar to invtxfm coefficients, or similar)
1301                     if (s->pass != 1) {
1302                         memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1303                     }
1304
1305                     if (s->pass == 2) {
1306                         decode_sb_mem(td, row, col, lflvl_ptr,
1307                                       yoff2, uvoff2, BL_64X64);
1308                     } else {
1309                         if (vpX_rac_is_end(td->c)) {
1310                             return AVERROR_INVALIDDATA;
1311                         }
1312                         decode_sb(td, row, col, lflvl_ptr,
1313                                   yoff2, uvoff2, BL_64X64);
1314                     }
1315                 }
1316             }
1317
1318             if (s->pass == 1)
1319                 continue;
1320
1321             // backup pre-loopfilter reconstruction data for intra
1322             // prediction of next row of sb64s
1323             if (row + 8 < s->rows) {
1324                 memcpy(s->intra_pred_data[0],
1325                        f->data[0] + yoff + 63 * ls_y,
1326                        8 * s->cols * bytesperpixel);
1327                 memcpy(s->intra_pred_data[1],
1328                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1329                        8 * s->cols * bytesperpixel >> s->ss_h);
1330                 memcpy(s->intra_pred_data[2],
1331                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1332                        8 * s->cols * bytesperpixel >> s->ss_h);
1333             }
1334
1335             // loopfilter one row
1336             if (s->s.h.filter.level) {
1337                 yoff2 = yoff;
1338                 uvoff2 = uvoff;
1339                 lflvl_ptr = s->lflvl;
1340                 for (col = 0; col < s->cols;
1341                      col += 8, yoff2 += 64 * bytesperpixel,
1342                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1343                     ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1344                                          yoff2, uvoff2);
1345                 }
1346             }
1347
1348             // FIXME maybe we can make this more finegrained by running the
1349             // loopfilter per-block instead of after each sbrow
1350             // In fact that would also make intra pred left preparation easier?
1351             ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1352         }
1353     }
1354     return 0;
1355 }
1356
1357 #if HAVE_THREADS
1358 static av_always_inline
1359 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1360                               int threadnr)
1361 {
1362     VP9Context *s = avctx->priv_data;
1363     VP9TileData *td = &s->td[jobnr];
1364     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1365     int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1366     unsigned tile_cols_len;
1367     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1368     VP9Filter *lflvl_ptr_base;
1369     AVFrame *f;
1370
1371     f = s->s.frames[CUR_FRAME].tf.f;
1372     ls_y = f->linesize[0];
1373     ls_uv =f->linesize[1];
1374
1375     set_tile_offset(&tile_col_start, &tile_col_end,
1376                     jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1377     td->tile_col_start  = tile_col_start;
1378     uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1379     yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1380     lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1381
1382     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1383         set_tile_offset(&tile_row_start, &tile_row_end,
1384                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1385
1386         td->c = &td->c_b[tile_row];
1387         for (row = tile_row_start; row < tile_row_end;
1388              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1389             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1390             VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1391
1392             memset(td->left_partition_ctx, 0, 8);
1393             memset(td->left_skip_ctx, 0, 8);
1394             if (s->s.h.keyframe || s->s.h.intraonly) {
1395                 memset(td->left_mode_ctx, DC_PRED, 16);
1396             } else {
1397                 memset(td->left_mode_ctx, NEARESTMV, 8);
1398             }
1399             memset(td->left_y_nnz_ctx, 0, 16);
1400             memset(td->left_uv_nnz_ctx, 0, 32);
1401             memset(td->left_segpred_ctx, 0, 8);
1402
1403             for (col = tile_col_start;
1404                  col < tile_col_end;
1405                  col += 8, yoff2 += 64 * bytesperpixel,
1406                  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1407                 // FIXME integrate with lf code (i.e. zero after each
1408                 // use, similar to invtxfm coefficients, or similar)
1409                 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1410                 decode_sb(td, row, col, lflvl_ptr,
1411                             yoff2, uvoff2, BL_64X64);
1412             }
1413
1414             // backup pre-loopfilter reconstruction data for intra
1415             // prediction of next row of sb64s
1416             tile_cols_len = tile_col_end - tile_col_start;
1417             if (row + 8 < s->rows) {
1418                 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1419                        f->data[0] + yoff + 63 * ls_y,
1420                        8 * tile_cols_len * bytesperpixel);
1421                 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1422                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1423                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1424                 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1425                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1426                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1427             }
1428
1429             vp9_report_tile_progress(s, row >> 3, 1);
1430         }
1431     }
1432     return 0;
1433 }
1434
1435 static av_always_inline
1436 int loopfilter_proc(AVCodecContext *avctx)
1437 {
1438     VP9Context *s = avctx->priv_data;
1439     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1440     VP9Filter *lflvl_ptr;
1441     int bytesperpixel = s->bytesperpixel, col, i;
1442     AVFrame *f;
1443
1444     f = s->s.frames[CUR_FRAME].tf.f;
1445     ls_y = f->linesize[0];
1446     ls_uv =f->linesize[1];
1447
1448     for (i = 0; i < s->sb_rows; i++) {
1449         vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1450
1451         if (s->s.h.filter.level) {
1452             yoff = (ls_y * 64)*i;
1453             uvoff =  (ls_uv * 64 >> s->ss_v)*i;
1454             lflvl_ptr = s->lflvl+s->sb_cols*i;
1455             for (col = 0; col < s->cols;
1456                  col += 8, yoff += 64 * bytesperpixel,
1457                  uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1458                 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1459                                      yoff, uvoff);
1460             }
1461         }
1462     }
1463     return 0;
1464 }
1465 #endif
1466
1467 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1468                             int *got_frame, AVPacket *pkt)
1469 {
1470     const uint8_t *data = pkt->data;
1471     int size = pkt->size;
1472     VP9Context *s = avctx->priv_data;
1473     int ret, i, j, ref;
1474     int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1475                             (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1476     AVFrame *f;
1477
1478     if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1479         return ret;
1480     } else if (ret == 0) {
1481         if (!s->s.refs[ref].f->buf[0]) {
1482             av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1483             return AVERROR_INVALIDDATA;
1484         }
1485         if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1486             return ret;
1487         ((AVFrame *)frame)->pts = pkt->pts;
1488 #if FF_API_PKT_PTS
1489 FF_DISABLE_DEPRECATION_WARNINGS
1490         ((AVFrame *)frame)->pkt_pts = pkt->pts;
1491 FF_ENABLE_DEPRECATION_WARNINGS
1492 #endif
1493         ((AVFrame *)frame)->pkt_dts = pkt->dts;
1494         for (i = 0; i < 8; i++) {
1495             if (s->next_refs[i].f->buf[0])
1496                 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1497             if (s->s.refs[i].f->buf[0] &&
1498                 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1499                 return ret;
1500         }
1501         *got_frame = 1;
1502         return pkt->size;
1503     }
1504     data += ret;
1505     size -= ret;
1506
1507     if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1508         if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1509             vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1510         if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1511             (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1512             return ret;
1513     }
1514     if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1515         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1516     if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1517         (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1518         return ret;
1519     if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1520         vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1521     if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1522         return ret;
1523     f = s->s.frames[CUR_FRAME].tf.f;
1524     f->key_frame = s->s.h.keyframe;
1525     f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1526
1527     if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1528         (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
1529          s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1530         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1531     }
1532
1533     // ref frame setup
1534     for (i = 0; i < 8; i++) {
1535         if (s->next_refs[i].f->buf[0])
1536             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1537         if (s->s.h.refreshrefmask & (1 << i)) {
1538             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1539         } else if (s->s.refs[i].f->buf[0]) {
1540             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1541         }
1542         if (ret < 0)
1543             return ret;
1544     }
1545
1546     if (avctx->hwaccel) {
1547         ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1548         if (ret < 0)
1549             return ret;
1550         ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1551         if (ret < 0)
1552             return ret;
1553         ret = avctx->hwaccel->end_frame(avctx);
1554         if (ret < 0)
1555             return ret;
1556         goto finish;
1557     }
1558
1559     // main tile decode loop
1560     memset(s->above_partition_ctx, 0, s->cols);
1561     memset(s->above_skip_ctx, 0, s->cols);
1562     if (s->s.h.keyframe || s->s.h.intraonly) {
1563         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1564     } else {
1565         memset(s->above_mode_ctx, NEARESTMV, s->cols);
1566     }
1567     memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1568     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1569     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1570     memset(s->above_segpred_ctx, 0, s->cols);
1571     s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1572         avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1573     if ((ret = update_block_buffers(avctx)) < 0) {
1574         av_log(avctx, AV_LOG_ERROR,
1575                "Failed to allocate block buffers\n");
1576         return ret;
1577     }
1578     if (s->s.h.refreshctx && s->s.h.parallelmode) {
1579         int j, k, l, m;
1580
1581         for (i = 0; i < 4; i++) {
1582             for (j = 0; j < 2; j++)
1583                 for (k = 0; k < 2; k++)
1584                     for (l = 0; l < 6; l++)
1585                         for (m = 0; m < 6; m++)
1586                             memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1587                                    s->prob.coef[i][j][k][l][m], 3);
1588             if (s->s.h.txfmmode == i)
1589                 break;
1590         }
1591         s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1592         ff_thread_finish_setup(avctx);
1593     } else if (!s->s.h.refreshctx) {
1594         ff_thread_finish_setup(avctx);
1595     }
1596
1597 #if HAVE_THREADS
1598     if (avctx->active_thread_type & FF_THREAD_SLICE) {
1599         for (i = 0; i < s->sb_rows; i++)
1600             atomic_store(&s->entries[i], 0);
1601     }
1602 #endif
1603
1604     do {
1605         for (i = 0; i < s->active_tile_cols; i++) {
1606             s->td[i].b = s->td[i].b_base;
1607             s->td[i].block = s->td[i].block_base;
1608             s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1609             s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1610             s->td[i].eob = s->td[i].eob_base;
1611             s->td[i].uveob[0] = s->td[i].uveob_base[0];
1612             s->td[i].uveob[1] = s->td[i].uveob_base[1];
1613         }
1614
1615 #if HAVE_THREADS
1616         if (avctx->active_thread_type == FF_THREAD_SLICE) {
1617             int tile_row, tile_col;
1618
1619             av_assert1(!s->pass);
1620
1621             for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1622                 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1623                     int64_t tile_size;
1624
1625                     if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1626                         tile_row == s->s.h.tiling.tile_rows - 1) {
1627                         tile_size = size;
1628                     } else {
1629                         tile_size = AV_RB32(data);
1630                         data += 4;
1631                         size -= 4;
1632                     }
1633                     if (tile_size > size)
1634                         return AVERROR_INVALIDDATA;
1635                     ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1636                     if (ret < 0)
1637                         return ret;
1638                     if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1639                         return AVERROR_INVALIDDATA;
1640                     data += tile_size;
1641                     size -= tile_size;
1642                 }
1643             }
1644
1645             ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1646         } else
1647 #endif
1648         {
1649             ret = decode_tiles(avctx, data, size);
1650             if (ret < 0) {
1651                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1652                 return ret;
1653             }
1654         }
1655
1656         // Sum all counts fields into td[0].counts for tile threading
1657         if (avctx->active_thread_type == FF_THREAD_SLICE)
1658             for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1659                 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1660                     ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1661
1662         if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1663             ff_vp9_adapt_probs(s);
1664             ff_thread_finish_setup(avctx);
1665         }
1666     } while (s->pass++ == 1);
1667     ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1668
1669 finish:
1670     // ref frame setup
1671     for (i = 0; i < 8; i++) {
1672         if (s->s.refs[i].f->buf[0])
1673             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1674         if (s->next_refs[i].f->buf[0] &&
1675             (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1676             return ret;
1677     }
1678
1679     if (!s->s.h.invisible) {
1680         if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1681             return ret;
1682         *got_frame = 1;
1683     }
1684
1685     return pkt->size;
1686 }
1687
1688 static void vp9_decode_flush(AVCodecContext *avctx)
1689 {
1690     VP9Context *s = avctx->priv_data;
1691     int i;
1692
1693     for (i = 0; i < 3; i++)
1694         vp9_frame_unref(avctx, &s->s.frames[i]);
1695     for (i = 0; i < 8; i++)
1696         ff_thread_release_buffer(avctx, &s->s.refs[i]);
1697 }
1698
1699 static int init_frames(AVCodecContext *avctx)
1700 {
1701     VP9Context *s = avctx->priv_data;
1702     int i;
1703
1704     for (i = 0; i < 3; i++) {
1705         s->s.frames[i].tf.f = av_frame_alloc();
1706         if (!s->s.frames[i].tf.f) {
1707             vp9_decode_free(avctx);
1708             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1709             return AVERROR(ENOMEM);
1710         }
1711     }
1712     for (i = 0; i < 8; i++) {
1713         s->s.refs[i].f = av_frame_alloc();
1714         s->next_refs[i].f = av_frame_alloc();
1715         if (!s->s.refs[i].f || !s->next_refs[i].f) {
1716             vp9_decode_free(avctx);
1717             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1718             return AVERROR(ENOMEM);
1719         }
1720     }
1721
1722     return 0;
1723 }
1724
1725 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1726 {
1727     VP9Context *s = avctx->priv_data;
1728
1729     avctx->internal->allocate_progress = 1;
1730     s->last_bpp = 0;
1731     s->s.h.filter.sharpness = -1;
1732
1733     return init_frames(avctx);
1734 }
1735
1736 #if HAVE_THREADS
1737 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1738 {
1739     return init_frames(avctx);
1740 }
1741
1742 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1743 {
1744     int i, ret;
1745     VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1746
1747     for (i = 0; i < 3; i++) {
1748         if (s->s.frames[i].tf.f->buf[0])
1749             vp9_frame_unref(dst, &s->s.frames[i]);
1750         if (ssrc->s.frames[i].tf.f->buf[0]) {
1751             if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1752                 return ret;
1753         }
1754     }
1755     for (i = 0; i < 8; i++) {
1756         if (s->s.refs[i].f->buf[0])
1757             ff_thread_release_buffer(dst, &s->s.refs[i]);
1758         if (ssrc->next_refs[i].f->buf[0]) {
1759             if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1760                 return ret;
1761         }
1762     }
1763
1764     s->s.h.invisible = ssrc->s.h.invisible;
1765     s->s.h.keyframe = ssrc->s.h.keyframe;
1766     s->s.h.intraonly = ssrc->s.h.intraonly;
1767     s->ss_v = ssrc->ss_v;
1768     s->ss_h = ssrc->ss_h;
1769     s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1770     s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1771     s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1772     s->bytesperpixel = ssrc->bytesperpixel;
1773     s->gf_fmt = ssrc->gf_fmt;
1774     s->w = ssrc->w;
1775     s->h = ssrc->h;
1776     s->s.h.bpp = ssrc->s.h.bpp;
1777     s->bpp_index = ssrc->bpp_index;
1778     s->pix_fmt = ssrc->pix_fmt;
1779     memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1780     memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1781     memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1782            sizeof(s->s.h.segmentation.feat));
1783
1784     return 0;
1785 }
1786 #endif
1787
1788 AVCodec ff_vp9_decoder = {
1789     .name                  = "vp9",
1790     .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
1791     .type                  = AVMEDIA_TYPE_VIDEO,
1792     .id                    = AV_CODEC_ID_VP9,
1793     .priv_data_size        = sizeof(VP9Context),
1794     .init                  = vp9_decode_init,
1795     .close                 = vp9_decode_free,
1796     .decode                = vp9_decode_frame,
1797     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1798     .caps_internal         = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1799     .flush                 = vp9_decode_flush,
1800     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1801     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1802     .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1803     .bsfs                  = "vp9_superframe_split",
1804     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
1805 #if CONFIG_VP9_DXVA2_HWACCEL
1806                                HWACCEL_DXVA2(vp9),
1807 #endif
1808 #if CONFIG_VP9_D3D11VA_HWACCEL
1809                                HWACCEL_D3D11VA(vp9),
1810 #endif
1811 #if CONFIG_VP9_D3D11VA2_HWACCEL
1812                                HWACCEL_D3D11VA2(vp9),
1813 #endif
1814 #if CONFIG_VP9_NVDEC_HWACCEL
1815                                HWACCEL_NVDEC(vp9),
1816 #endif
1817 #if CONFIG_VP9_VAAPI_HWACCEL
1818                                HWACCEL_VAAPI(vp9),
1819 #endif
1820                                NULL
1821                            },
1822 };