git.sesse.net Git - ffmpeg/blob - libavcodec/vp9.c

   1 /*
   2  * VP9 compatible video decoder
   3  *
   4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
   5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include "avcodec.h"
  25 #include "get_bits.h"
  26 #include "internal.h"
  27 #include "profiles.h"
  28 #include "thread.h"
  29 #include "videodsp.h"
  30 #include "vp56.h"
  31 #include "vp9.h"
  32 #include "vp9data.h"
  33 #include "vp9dec.h"
  34 #include "libavutil/avassert.h"
  35 #include "libavutil/pixdesc.h"
  36
  37 #define VP9_SYNCCODE 0x498342
  38
  39 #if HAVE_THREADS
  40 static void vp9_free_entries(AVCodecContext *avctx) {
  41     VP9Context *s = avctx->priv_data;
  42
  43     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  44         pthread_mutex_destroy(&s->progress_mutex);
  45         pthread_cond_destroy(&s->progress_cond);
  46         av_freep(&s->entries);
  47     }
  48 }
  49
  50 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
  51     VP9Context *s = avctx->priv_data;
  52     int i;
  53
  54     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  55         if (s->entries)
  56             av_freep(&s->entries);
  57
  58         s->entries = av_malloc_array(n, sizeof(atomic_int));
  59
  60         if (!s->entries) {
  61             av_freep(&s->entries);
  62             return AVERROR(ENOMEM);
  63         }
  64
  65         for (i  = 0; i < n; i++)
  66             atomic_init(&s->entries[i], 0);
  67
  68         pthread_mutex_init(&s->progress_mutex, NULL);
  69         pthread_cond_init(&s->progress_cond, NULL);
  70     }
  71     return 0;
  72 }
  73
  74 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
  75     pthread_mutex_lock(&s->progress_mutex);
  76     atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
  77     pthread_cond_signal(&s->progress_cond);
  78     pthread_mutex_unlock(&s->progress_mutex);
  79 }
  80
  81 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
  82     if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
  83         return;
  84
  85     pthread_mutex_lock(&s->progress_mutex);
  86     while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
  87         pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
  88     pthread_mutex_unlock(&s->progress_mutex);
  89 }
  90 #else
  91 static void vp9_free_entries(AVCodecContext *avctx) {}
  92 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
  93 #endif
  94
  95 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
  96 {
  97     ff_thread_release_buffer(avctx, &f->tf);
  98     av_buffer_unref(&f->extradata);
  99     av_buffer_unref(&f->hwaccel_priv_buf);
 100     f->segmentation_map = NULL;
 101     f->hwaccel_picture_private = NULL;
 102 }
 103
 104 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
 105 {
 106     VP9Context *s = avctx->priv_data;
 107     int ret, sz;
 108
 109     ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
 110     if (ret < 0)
 111         return ret;
 112
 113     sz = 64 * s->sb_cols * s->sb_rows;
 114     f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
 115     if (!f->extradata) {
 116         goto fail;
 117     }
 118
 119     f->segmentation_map = f->extradata->data;
 120     f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
 121
 122     if (avctx->hwaccel) {
 123         const AVHWAccel *hwaccel = avctx->hwaccel;
 124         av_assert0(!f->hwaccel_picture_private);
 125         if (hwaccel->frame_priv_data_size) {
 126             f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
 127             if (!f->hwaccel_priv_buf)
 128                 goto fail;
 129             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
 130         }
 131     }
 132
 133     return 0;
 134
 135 fail:
 136     vp9_frame_unref(avctx, f);
 137     return AVERROR(ENOMEM);
 138 }
 139
 140 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
 141 {
 142     int ret;
 143
 144     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
 145     if (ret < 0)
 146         return ret;
 147
 148     dst->extradata = av_buffer_ref(src->extradata);
 149     if (!dst->extradata)
 150         goto fail;
 151
 152     dst->segmentation_map = src->segmentation_map;
 153     dst->mv = src->mv;
 154     dst->uses_2pass = src->uses_2pass;
 155
 156     if (src->hwaccel_picture_private) {
 157         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
 158         if (!dst->hwaccel_priv_buf)
 159             goto fail;
 160         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
 161     }
 162
 163     return 0;
 164
 165 fail:
 166     vp9_frame_unref(avctx, dst);
 167     return AVERROR(ENOMEM);
 168 }
 169
 170 static int update_size(AVCodecContext *avctx, int w, int h)
 171 {
 172 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
 173                      CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
 174                      CONFIG_VP9_NVDEC_HWACCEL + \
 175                      CONFIG_VP9_VAAPI_HWACCEL)
 176     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
 177     VP9Context *s = avctx->priv_data;
 178     uint8_t *p;
 179     int bytesperpixel = s->bytesperpixel, ret, cols, rows;
 180     int lflvl_len, i;
 181
 182     av_assert0(w > 0 && h > 0);
 183
 184     if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
 185         if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
 186             return ret;
 187
 188         switch (s->pix_fmt) {
 189         case AV_PIX_FMT_YUV420P:
 190 #if CONFIG_VP9_DXVA2_HWACCEL
 191             *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 192 #endif
 193 #if CONFIG_VP9_D3D11VA_HWACCEL
 194             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
 195             *fmtp++ = AV_PIX_FMT_D3D11;
 196 #endif
 197 #if CONFIG_VP9_NVDEC_HWACCEL
 198             *fmtp++ = AV_PIX_FMT_CUDA;
 199 #endif
 200 #if CONFIG_VP9_VAAPI_HWACCEL
 201             *fmtp++ = AV_PIX_FMT_VAAPI;
 202 #endif
 203             break;
 204         case AV_PIX_FMT_YUV420P10:
 205         case AV_PIX_FMT_YUV420P12:
 206 #if CONFIG_VP9_NVDEC_HWACCEL
 207             *fmtp++ = AV_PIX_FMT_CUDA;
 208 #endif
 209 #if CONFIG_VP9_VAAPI_HWACCEL
 210             *fmtp++ = AV_PIX_FMT_VAAPI;
 211 #endif
 212             break;
 213         }
 214
 215         *fmtp++ = s->pix_fmt;
 216         *fmtp = AV_PIX_FMT_NONE;
 217
 218         ret = ff_thread_get_format(avctx, pix_fmts);
 219         if (ret < 0)
 220             return ret;
 221
 222         avctx->pix_fmt = ret;
 223         s->gf_fmt  = s->pix_fmt;
 224         s->w = w;
 225         s->h = h;
 226     }
 227
 228     cols = (w + 7) >> 3;
 229     rows = (h + 7) >> 3;
 230
 231     if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
 232         return 0;
 233
 234     s->last_fmt  = s->pix_fmt;
 235     s->sb_cols   = (w + 63) >> 6;
 236     s->sb_rows   = (h + 63) >> 6;
 237     s->cols      = (w + 7) >> 3;
 238     s->rows      = (h + 7) >> 3;
 239     lflvl_len    = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
 240
 241 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
 242     av_freep(&s->intra_pred_data[0]);
 243     // FIXME we slightly over-allocate here for subsampled chroma, but a little
 244     // bit of padding shouldn't affect performance...
 245     p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
 246                                 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
 247     if (!p)
 248         return AVERROR(ENOMEM);
 249     assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
 250     assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
 251     assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
 252     assign(s->above_y_nnz_ctx,     uint8_t *,             16);
 253     assign(s->above_mode_ctx,      uint8_t *,             16);
 254     assign(s->above_mv_ctx,        VP56mv(*)[2],          16);
 255     assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
 256     assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
 257     assign(s->above_partition_ctx, uint8_t *,              8);
 258     assign(s->above_skip_ctx,      uint8_t *,              8);
 259     assign(s->above_txfm_ctx,      uint8_t *,              8);
 260     assign(s->above_segpred_ctx,   uint8_t *,              8);
 261     assign(s->above_intra_ctx,     uint8_t *,              8);
 262     assign(s->above_comp_ctx,      uint8_t *,              8);
 263     assign(s->above_ref_ctx,       uint8_t *,              8);
 264     assign(s->above_filter_ctx,    uint8_t *,              8);
 265     assign(s->lflvl,               VP9Filter *,            lflvl_len);
 266 #undef assign
 267
 268     if (s->td) {
 269         for (i = 0; i < s->active_tile_cols; i++) {
 270             av_freep(&s->td[i].b_base);
 271             av_freep(&s->td[i].block_base);
 272         }
 273     }
 274
 275     if (s->s.h.bpp != s->last_bpp) {
 276         ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
 277         ff_videodsp_init(&s->vdsp, s->s.h.bpp);
 278         s->last_bpp = s->s.h.bpp;
 279     }
 280
 281     return 0;
 282 }
 283
 284 static int update_block_buffers(AVCodecContext *avctx)
 285 {
 286     int i;
 287     VP9Context *s = avctx->priv_data;
 288     int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
 289     VP9TileData *td = &s->td[0];
 290
 291     if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
 292         return 0;
 293
 294     av_free(td->b_base);
 295     av_free(td->block_base);
 296     chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
 297     chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
 298     if (s->s.frames[CUR_FRAME].uses_2pass) {
 299         int sbs = s->sb_cols * s->sb_rows;
 300
 301         td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
 302         td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 303                                     16 * 16 + 2 * chroma_eobs) * sbs);
 304         if (!td->b_base || !td->block_base)
 305             return AVERROR(ENOMEM);
 306         td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
 307         td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
 308         td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
 309         td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
 310         td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
 311     } else {
 312         for (i = 1; i < s->active_tile_cols; i++) {
 313             if (s->td[i].b_base && s->td[i].block_base) {
 314                 av_free(s->td[i].b_base);
 315                 av_free(s->td[i].block_base);
 316             }
 317         }
 318         for (i = 0; i < s->active_tile_cols; i++) {
 319             s->td[i].b_base = av_malloc(sizeof(VP9Block));
 320             s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 321                                        16 * 16 + 2 * chroma_eobs);
 322             if (!s->td[i].b_base || !s->td[i].block_base)
 323                 return AVERROR(ENOMEM);
 324             s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
 325             s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
 326             s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
 327             s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
 328             s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
 329         }
 330     }
 331     s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
 332
 333     return 0;
 334 }
 335
 336 // The sign bit is at the end, not the start, of a bit sequence
 337 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
 338 {
 339     int v = get_bits(gb, n);
 340     return get_bits1(gb) ? -v : v;
 341 }
 342
 343 static av_always_inline int inv_recenter_nonneg(int v, int m)
 344 {
 345     if (v > 2 * m)
 346         return v;
 347     if (v & 1)
 348         return m - ((v + 1) >> 1);
 349     return m + (v >> 1);
 350 }
 351
 352 // differential forward probability updates
 353 static int update_prob(VP56RangeCoder *c, int p)
 354 {
 355     static const int inv_map_table[255] = {
 356           7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
 357         189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
 358          10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
 359          25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
 360          40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
 361          55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
 362          70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
 363          86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
 364         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
 365         116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
 366         131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
 367         146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 368         161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
 369         177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
 370         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
 371         207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
 372         222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
 373         237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
 374         252, 253, 253,
 375     };
 376     int d;
 377
 378     /* This code is trying to do a differential probability update. For a
 379      * current probability A in the range [1, 255], the difference to a new
 380      * probability of any value can be expressed differentially as 1-A, 255-A
 381      * where some part of this (absolute range) exists both in positive as
 382      * well as the negative part, whereas another part only exists in one
 383      * half. We're trying to code this shared part differentially, i.e.
 384      * times two where the value of the lowest bit specifies the sign, and
 385      * the single part is then coded on top of this. This absolute difference
 386      * then again has a value of [0, 254], but a bigger value in this range
 387      * indicates that we're further away from the original value A, so we
 388      * can code this as a VLC code, since higher values are increasingly
 389      * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
 390      * updates vs. the 'fine, exact' updates further down the range, which
 391      * adds one extra dimension to this differential update model. */
 392
 393     if (!vp8_rac_get(c)) {
 394         d = vp8_rac_get_uint(c, 4) + 0;
 395     } else if (!vp8_rac_get(c)) {
 396         d = vp8_rac_get_uint(c, 4) + 16;
 397     } else if (!vp8_rac_get(c)) {
 398         d = vp8_rac_get_uint(c, 5) + 32;
 399     } else {
 400         d = vp8_rac_get_uint(c, 7);
 401         if (d >= 65)
 402             d = (d << 1) - 65 + vp8_rac_get(c);
 403         d += 64;
 404         av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
 405     }
 406
 407     return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
 408                     255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 409 }
 410
 411 static int read_colorspace_details(AVCodecContext *avctx)
 412 {
 413     static const enum AVColorSpace colorspaces[8] = {
 414         AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
 415         AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
 416     };
 417     VP9Context *s = avctx->priv_data;
 418     int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
 419
 420     s->bpp_index = bits;
 421     s->s.h.bpp = 8 + bits * 2;
 422     s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
 423     avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
 424     if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
 425         static const enum AVPixelFormat pix_fmt_rgb[3] = {
 426             AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
 427         };
 428         s->ss_h = s->ss_v = 0;
 429         avctx->color_range = AVCOL_RANGE_JPEG;
 430         s->pix_fmt = pix_fmt_rgb[bits];
 431         if (avctx->profile & 1) {
 432             if (get_bits1(&s->gb)) {
 433                 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
 434                 return AVERROR_INVALIDDATA;
 435             }
 436         } else {
 437             av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
 438                    avctx->profile);
 439             return AVERROR_INVALIDDATA;
 440         }
 441     } else {
 442         static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
 443             { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
 444               { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
 445             { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
 446               { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
 447             { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
 448               { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
 449         };
 450         avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
 451         if (avctx->profile & 1) {
 452             s->ss_h = get_bits1(&s->gb);
 453             s->ss_v = get_bits1(&s->gb);
 454             s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
 455             if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
 456                 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
 457                        avctx->profile);
 458                 return AVERROR_INVALIDDATA;
 459             } else if (get_bits1(&s->gb)) {
 460                 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
 461                        avctx->profile);
 462                 return AVERROR_INVALIDDATA;
 463             }
 464         } else {
 465             s->ss_h = s->ss_v = 1;
 466             s->pix_fmt = pix_fmt_for_ss[bits][1][1];
 467         }
 468     }
 469
 470     return 0;
 471 }
 472
 473 static int decode_frame_header(AVCodecContext *avctx,
 474                                const uint8_t *data, int size, int *ref)
 475 {
 476     VP9Context *s = avctx->priv_data;
 477     int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
 478     int last_invisible;
 479     const uint8_t *data2;
 480
 481     /* general header */
 482     if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
 483         av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
 484         return ret;
 485     }
 486     if (get_bits(&s->gb, 2) != 0x2) { // frame marker
 487         av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
 488         return AVERROR_INVALIDDATA;
 489     }
 490     avctx->profile  = get_bits1(&s->gb);
 491     avctx->profile |= get_bits1(&s->gb) << 1;
 492     if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
 493     if (avctx->profile > 3) {
 494         av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
 495         return AVERROR_INVALIDDATA;
 496     }
 497     s->s.h.profile = avctx->profile;
 498     if (get_bits1(&s->gb)) {
 499         *ref = get_bits(&s->gb, 3);
 500         return 0;
 501     }
 502
 503     s->last_keyframe  = s->s.h.keyframe;
 504     s->s.h.keyframe   = !get_bits1(&s->gb);
 505
 506     last_invisible   = s->s.h.invisible;
 507     s->s.h.invisible = !get_bits1(&s->gb);
 508     s->s.h.errorres  = get_bits1(&s->gb);
 509     s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
 510
 511     if (s->s.h.keyframe) {
 512         if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 513             av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 514             return AVERROR_INVALIDDATA;
 515         }
 516         if ((ret = read_colorspace_details(avctx)) < 0)
 517             return ret;
 518         // for profile 1, here follows the subsampling bits
 519         s->s.h.refreshrefmask = 0xff;
 520         w = get_bits(&s->gb, 16) + 1;
 521         h = get_bits(&s->gb, 16) + 1;
 522         if (get_bits1(&s->gb)) // display size
 523             skip_bits(&s->gb, 32);
 524     } else {
 525         s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
 526         s->s.h.resetctx  = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
 527         if (s->s.h.intraonly) {
 528             if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 529                 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 530                 return AVERROR_INVALIDDATA;
 531             }
 532             if (avctx->profile >= 1) {
 533                 if ((ret = read_colorspace_details(avctx)) < 0)
 534                     return ret;
 535             } else {
 536                 s->ss_h = s->ss_v = 1;
 537                 s->s.h.bpp = 8;
 538                 s->bpp_index = 0;
 539                 s->bytesperpixel = 1;
 540                 s->pix_fmt = AV_PIX_FMT_YUV420P;
 541                 avctx->colorspace = AVCOL_SPC_BT470BG;
 542                 avctx->color_range = AVCOL_RANGE_MPEG;
 543             }
 544             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 545             w = get_bits(&s->gb, 16) + 1;
 546             h = get_bits(&s->gb, 16) + 1;
 547             if (get_bits1(&s->gb)) // display size
 548                 skip_bits(&s->gb, 32);
 549         } else {
 550             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 551             s->s.h.refidx[0]      = get_bits(&s->gb, 3);
 552             s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
 553             s->s.h.refidx[1]      = get_bits(&s->gb, 3);
 554             s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
 555             s->s.h.refidx[2]      = get_bits(&s->gb, 3);
 556             s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
 557             if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
 558                 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
 559                 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
 560                 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
 561                 return AVERROR_INVALIDDATA;
 562             }
 563             if (get_bits1(&s->gb)) {
 564                 w = s->s.refs[s->s.h.refidx[0]].f->width;
 565                 h = s->s.refs[s->s.h.refidx[0]].f->height;
 566             } else if (get_bits1(&s->gb)) {
 567                 w = s->s.refs[s->s.h.refidx[1]].f->width;
 568                 h = s->s.refs[s->s.h.refidx[1]].f->height;
 569             } else if (get_bits1(&s->gb)) {
 570                 w = s->s.refs[s->s.h.refidx[2]].f->width;
 571                 h = s->s.refs[s->s.h.refidx[2]].f->height;
 572             } else {
 573                 w = get_bits(&s->gb, 16) + 1;
 574                 h = get_bits(&s->gb, 16) + 1;
 575             }
 576             // Note that in this code, "CUR_FRAME" is actually before we
 577             // have formally allocated a frame, and thus actually represents
 578             // the _last_ frame
 579             s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
 580                                        s->s.frames[CUR_FRAME].tf.f->height == h;
 581             if (get_bits1(&s->gb)) // display size
 582                 skip_bits(&s->gb, 32);
 583             s->s.h.highprecisionmvs = get_bits1(&s->gb);
 584             s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
 585                                                   get_bits(&s->gb, 2);
 586             s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
 587                                   s->s.h.signbias[0] != s->s.h.signbias[2];
 588             if (s->s.h.allowcompinter) {
 589                 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
 590                     s->s.h.fixcompref    = 2;
 591                     s->s.h.varcompref[0] = 0;
 592                     s->s.h.varcompref[1] = 1;
 593                 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
 594                     s->s.h.fixcompref    = 1;
 595                     s->s.h.varcompref[0] = 0;
 596                     s->s.h.varcompref[1] = 2;
 597                 } else {
 598                     s->s.h.fixcompref    = 0;
 599                     s->s.h.varcompref[0] = 1;
 600                     s->s.h.varcompref[1] = 2;
 601                 }
 602             }
 603         }
 604     }
 605     s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
 606     s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
 607     s->s.h.framectxid   = c = get_bits(&s->gb, 2);
 608     if (s->s.h.keyframe || s->s.h.intraonly)
 609         s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
 610
 611     /* loopfilter header data */
 612     if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
 613         // reset loopfilter defaults
 614         s->s.h.lf_delta.ref[0] = 1;
 615         s->s.h.lf_delta.ref[1] = 0;
 616         s->s.h.lf_delta.ref[2] = -1;
 617         s->s.h.lf_delta.ref[3] = -1;
 618         s->s.h.lf_delta.mode[0] = 0;
 619         s->s.h.lf_delta.mode[1] = 0;
 620         memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
 621     }
 622     s->s.h.filter.level = get_bits(&s->gb, 6);
 623     sharp = get_bits(&s->gb, 3);
 624     // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
 625     // the old cache values since they are still valid
 626     if (s->s.h.filter.sharpness != sharp) {
 627         for (i = 1; i <= 63; i++) {
 628             int limit = i;
 629
 630             if (sharp > 0) {
 631                 limit >>= (sharp + 3) >> 2;
 632                 limit = FFMIN(limit, 9 - sharp);
 633             }
 634             limit = FFMAX(limit, 1);
 635
 636             s->filter_lut.lim_lut[i] = limit;
 637             s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
 638         }
 639     }
 640     s->s.h.filter.sharpness = sharp;
 641     if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
 642         if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
 643             for (i = 0; i < 4; i++)
 644                 if (get_bits1(&s->gb))
 645                     s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
 646             for (i = 0; i < 2; i++)
 647                 if (get_bits1(&s->gb))
 648                     s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
 649         }
 650     }
 651
 652     /* quantization header data */
 653     s->s.h.yac_qi      = get_bits(&s->gb, 8);
 654     s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 655     s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 656     s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 657     s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
 658                        s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
 659     if (s->s.h.lossless)
 660         avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
 661
 662     /* segmentation header info */
 663     if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
 664         if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
 665             for (i = 0; i < 7; i++)
 666                 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
 667                                  get_bits(&s->gb, 8) : 255;
 668             if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
 669                 for (i = 0; i < 3; i++)
 670                     s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
 671                                          get_bits(&s->gb, 8) : 255;
 672         }
 673
 674         if (get_bits1(&s->gb)) {
 675             s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
 676             for (i = 0; i < 8; i++) {
 677                 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
 678                     s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
 679                 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
 680                     s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
 681                 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
 682                     s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
 683                 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
 684             }
 685         }
 686     }
 687
 688     // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
 689     for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
 690         int qyac, qydc, quvac, quvdc, lflvl, sh;
 691
 692         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
 693             if (s->s.h.segmentation.absolute_vals)
 694                 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
 695             else
 696                 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
 697         } else {
 698             qyac  = s->s.h.yac_qi;
 699         }
 700         qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
 701         quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
 702         quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
 703         qyac  = av_clip_uintp2(qyac, 8);
 704
 705         s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
 706         s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
 707         s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
 708         s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
 709
 710         sh = s->s.h.filter.level >= 32;
 711         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
 712             if (s->s.h.segmentation.absolute_vals)
 713                 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
 714             else
 715                 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
 716         } else {
 717             lflvl  = s->s.h.filter.level;
 718         }
 719         if (s->s.h.lf_delta.enabled) {
 720             s->s.h.segmentation.feat[i].lflvl[0][0] =
 721             s->s.h.segmentation.feat[i].lflvl[0][1] =
 722                 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
 723             for (j = 1; j < 4; j++) {
 724                 s->s.h.segmentation.feat[i].lflvl[j][0] =
 725                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 726                                              s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
 727                 s->s.h.segmentation.feat[i].lflvl[j][1] =
 728                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 729                                              s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
 730             }
 731         } else {
 732             memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
 733                    sizeof(s->s.h.segmentation.feat[i].lflvl));
 734         }
 735     }
 736
 737     /* tiling info */
 738     if ((ret = update_size(avctx, w, h)) < 0) {
 739         av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
 740                w, h, s->pix_fmt);
 741         return ret;
 742     }
 743     for (s->s.h.tiling.log2_tile_cols = 0;
 744          s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
 745          s->s.h.tiling.log2_tile_cols++) ;
 746     for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
 747     max = FFMAX(0, max - 1);
 748     while (max > s->s.h.tiling.log2_tile_cols) {
 749         if (get_bits1(&s->gb))
 750             s->s.h.tiling.log2_tile_cols++;
 751         else
 752             break;
 753     }
 754     s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
 755     s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
 756     if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
 757         int n_range_coders;
 758         VP56RangeCoder *rc;
 759
 760         if (s->td) {
 761             for (i = 0; i < s->active_tile_cols; i++) {
 762                 av_free(s->td[i].b_base);
 763                 av_free(s->td[i].block_base);
 764             }
 765             av_free(s->td);
 766         }
 767
 768         s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
 769         vp9_free_entries(avctx);
 770         s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
 771                               s->s.h.tiling.tile_cols : 1;
 772         vp9_alloc_entries(avctx, s->sb_rows);
 773         if (avctx->active_thread_type == FF_THREAD_SLICE) {
 774             n_range_coders = 4; // max_tile_rows
 775         } else {
 776             n_range_coders = s->s.h.tiling.tile_cols;
 777         }
 778         s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
 779                                  n_range_coders * sizeof(VP56RangeCoder));
 780         if (!s->td)
 781             return AVERROR(ENOMEM);
 782         rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
 783         for (i = 0; i < s->active_tile_cols; i++) {
 784             s->td[i].s = s;
 785             s->td[i].c_b = rc;
 786             rc += n_range_coders;
 787         }
 788     }
 789
 790     /* check reference frames */
 791     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 792         for (i = 0; i < 3; i++) {
 793             AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
 794             int refw = ref->width, refh = ref->height;
 795
 796             if (ref->format != avctx->pix_fmt) {
 797                 av_log(avctx, AV_LOG_ERROR,
 798                        "Ref pixfmt (%s) did not match current frame (%s)",
 799                        av_get_pix_fmt_name(ref->format),
 800                        av_get_pix_fmt_name(avctx->pix_fmt));
 801                 return AVERROR_INVALIDDATA;
 802             } else if (refw == w && refh == h) {
 803                 s->mvscale[i][0] = s->mvscale[i][1] = 0;
 804             } else {
 805                 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
 806                     av_log(avctx, AV_LOG_ERROR,
 807                            "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
 808                            refw, refh, w, h);
 809                     return AVERROR_INVALIDDATA;
 810                 }
 811                 s->mvscale[i][0] = (refw << 14) / w;
 812                 s->mvscale[i][1] = (refh << 14) / h;
 813                 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
 814                 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
 815             }
 816         }
 817     }
 818
 819     if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
 820         s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
 821                            s->prob_ctx[3].p = ff_vp9_default_probs;
 822         memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
 823                sizeof(ff_vp9_default_coef_probs));
 824         memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
 825                sizeof(ff_vp9_default_coef_probs));
 826         memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
 827                sizeof(ff_vp9_default_coef_probs));
 828         memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
 829                sizeof(ff_vp9_default_coef_probs));
 830     } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
 831         s->prob_ctx[c].p = ff_vp9_default_probs;
 832         memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
 833                sizeof(ff_vp9_default_coef_probs));
 834     }
 835
 836     // next 16 bits is size of the rest of the header (arith-coded)
 837     s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
 838     s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
 839
 840     data2 = align_get_bits(&s->gb);
 841     if (size2 > size - (data2 - data)) {
 842         av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
 843         return AVERROR_INVALIDDATA;
 844     }
 845     ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
 846     if (ret < 0)
 847         return ret;
 848
 849     if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
 850         av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
 851         return AVERROR_INVALIDDATA;
 852     }
 853
 854     for (i = 0; i < s->active_tile_cols; i++) {
 855         if (s->s.h.keyframe || s->s.h.intraonly) {
 856             memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
 857             memset(s->td[i].counts.eob,  0, sizeof(s->td[0].counts.eob));
 858         } else {
 859             memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
 860         }
 861     }
 862
 863     /* FIXME is it faster to not copy here, but do it down in the fw updates
 864      * as explicit copies if the fw update is missing (and skip the copy upon
 865      * fw update)? */
 866     s->prob.p = s->prob_ctx[c].p;
 867
 868     // txfm updates
 869     if (s->s.h.lossless) {
 870         s->s.h.txfmmode = TX_4X4;
 871     } else {
 872         s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
 873         if (s->s.h.txfmmode == 3)
 874             s->s.h.txfmmode += vp8_rac_get(&s->c);
 875
 876         if (s->s.h.txfmmode == TX_SWITCHABLE) {
 877             for (i = 0; i < 2; i++)
 878                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 879                     s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
 880             for (i = 0; i < 2; i++)
 881                 for (j = 0; j < 2; j++)
 882                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 883                         s->prob.p.tx16p[i][j] =
 884                             update_prob(&s->c, s->prob.p.tx16p[i][j]);
 885             for (i = 0; i < 2; i++)
 886                 for (j = 0; j < 3; j++)
 887                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 888                         s->prob.p.tx32p[i][j] =
 889                             update_prob(&s->c, s->prob.p.tx32p[i][j]);
 890         }
 891     }
 892
 893     // coef updates
 894     for (i = 0; i < 4; i++) {
 895         uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
 896         if (vp8_rac_get(&s->c)) {
 897             for (j = 0; j < 2; j++)
 898                 for (k = 0; k < 2; k++)
 899                     for (l = 0; l < 6; l++)
 900                         for (m = 0; m < 6; m++) {
 901                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 902                             uint8_t *r = ref[j][k][l][m];
 903                             if (m >= 3 && l == 0) // dc only has 3 pt
 904                                 break;
 905                             for (n = 0; n < 3; n++) {
 906                                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 907                                     p[n] = update_prob(&s->c, r[n]);
 908                                 else
 909                                     p[n] = r[n];
 910                             }
 911                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 912                         }
 913         } else {
 914             for (j = 0; j < 2; j++)
 915                 for (k = 0; k < 2; k++)
 916                     for (l = 0; l < 6; l++)
 917                         for (m = 0; m < 6; m++) {
 918                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 919                             uint8_t *r = ref[j][k][l][m];
 920                             if (m > 3 && l == 0) // dc only has 3 pt
 921                                 break;
 922                             memcpy(p, r, 3);
 923                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 924                         }
 925         }
 926         if (s->s.h.txfmmode == i)
 927             break;
 928     }
 929
 930     // mode updates
 931     for (i = 0; i < 3; i++)
 932         if (vp56_rac_get_prob_branchy(&s->c, 252))
 933             s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
 934     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 935         for (i = 0; i < 7; i++)
 936             for (j = 0; j < 3; j++)
 937                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 938                     s->prob.p.mv_mode[i][j] =
 939                         update_prob(&s->c, s->prob.p.mv_mode[i][j]);
 940
 941         if (s->s.h.filtermode == FILTER_SWITCHABLE)
 942             for (i = 0; i < 4; i++)
 943                 for (j = 0; j < 2; j++)
 944                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 945                         s->prob.p.filter[i][j] =
 946                             update_prob(&s->c, s->prob.p.filter[i][j]);
 947
 948         for (i = 0; i < 4; i++)
 949             if (vp56_rac_get_prob_branchy(&s->c, 252))
 950                 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
 951
 952         if (s->s.h.allowcompinter) {
 953             s->s.h.comppredmode = vp8_rac_get(&s->c);
 954             if (s->s.h.comppredmode)
 955                 s->s.h.comppredmode += vp8_rac_get(&s->c);
 956             if (s->s.h.comppredmode == PRED_SWITCHABLE)
 957                 for (i = 0; i < 5; i++)
 958                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 959                         s->prob.p.comp[i] =
 960                             update_prob(&s->c, s->prob.p.comp[i]);
 961         } else {
 962             s->s.h.comppredmode = PRED_SINGLEREF;
 963         }
 964
 965         if (s->s.h.comppredmode != PRED_COMPREF) {
 966             for (i = 0; i < 5; i++) {
 967                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 968                     s->prob.p.single_ref[i][0] =
 969                         update_prob(&s->c, s->prob.p.single_ref[i][0]);
 970                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 971                     s->prob.p.single_ref[i][1] =
 972                         update_prob(&s->c, s->prob.p.single_ref[i][1]);
 973             }
 974         }
 975
 976         if (s->s.h.comppredmode != PRED_SINGLEREF) {
 977             for (i = 0; i < 5; i++)
 978                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 979                     s->prob.p.comp_ref[i] =
 980                         update_prob(&s->c, s->prob.p.comp_ref[i]);
 981         }
 982
 983         for (i = 0; i < 4; i++)
 984             for (j = 0; j < 9; j++)
 985                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 986                     s->prob.p.y_mode[i][j] =
 987                         update_prob(&s->c, s->prob.p.y_mode[i][j]);
 988
 989         for (i = 0; i < 4; i++)
 990             for (j = 0; j < 4; j++)
 991                 for (k = 0; k < 3; k++)
 992                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 993                         s->prob.p.partition[3 - i][j][k] =
 994                             update_prob(&s->c,
 995                                         s->prob.p.partition[3 - i][j][k]);
 996
 997         // mv fields don't use the update_prob subexp model for some reason
 998         for (i = 0; i < 3; i++)
 999             if (vp56_rac_get_prob_branchy(&s->c, 252))
1000                 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1001
1002         for (i = 0; i < 2; i++) {
1003             if (vp56_rac_get_prob_branchy(&s->c, 252))
1004                 s->prob.p.mv_comp[i].sign =
1005                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1006
1007             for (j = 0; j < 10; j++)
1008                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1009                     s->prob.p.mv_comp[i].classes[j] =
1010                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1011
1012             if (vp56_rac_get_prob_branchy(&s->c, 252))
1013                 s->prob.p.mv_comp[i].class0 =
1014                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1015
1016             for (j = 0; j < 10; j++)
1017                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018                     s->prob.p.mv_comp[i].bits[j] =
1019                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1020         }
1021
1022         for (i = 0; i < 2; i++) {
1023             for (j = 0; j < 2; j++)
1024                 for (k = 0; k < 3; k++)
1025                     if (vp56_rac_get_prob_branchy(&s->c, 252))
1026                         s->prob.p.mv_comp[i].class0_fp[j][k] =
1027                             (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1028
1029             for (j = 0; j < 3; j++)
1030                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1031                     s->prob.p.mv_comp[i].fp[j] =
1032                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1033         }
1034
1035         if (s->s.h.highprecisionmvs) {
1036             for (i = 0; i < 2; i++) {
1037                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1038                     s->prob.p.mv_comp[i].class0_hp =
1039                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1040
1041                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1042                     s->prob.p.mv_comp[i].hp =
1043                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1044             }
1045         }
1046     }
1047
1048     return (data2 - data) + size2;
1049 }
1050
1051 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1052                       ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1053 {
1054     const VP9Context *s = td->s;
1055     int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1056             (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1057     const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1058                                                      s->prob.p.partition[bl][c];
1059     enum BlockPartition bp;
1060     ptrdiff_t hbs = 4 >> bl;
1061     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1062     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1063     int bytesperpixel = s->bytesperpixel;
1064
1065     if (bl == BL_8X8) {
1066         bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1067         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1068     } else if (col + hbs < s->cols) { // FIXME why not <=?
1069         if (row + hbs < s->rows) { // FIXME why not <=?
1070             bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1071             switch (bp) {
1072             case PARTITION_NONE:
1073                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1074                 break;
1075             case PARTITION_H:
1076                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1077                 yoff  += hbs * 8 * y_stride;
1078                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1079                 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1080                 break;
1081             case PARTITION_V:
1082                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1083                 yoff  += hbs * 8 * bytesperpixel;
1084                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1085                 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1086                 break;
1087             case PARTITION_SPLIT:
1088                 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1089                 decode_sb(td, row, col + hbs, lflvl,
1090                           yoff + 8 * hbs * bytesperpixel,
1091                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1092                 yoff  += hbs * 8 * y_stride;
1093                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1094                 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1095                 decode_sb(td, row + hbs, col + hbs, lflvl,
1096                           yoff + 8 * hbs * bytesperpixel,
1097                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1098                 break;
1099             default:
1100                 av_assert0(0);
1101             }
1102         } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1103             bp = PARTITION_SPLIT;
1104             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1105             decode_sb(td, row, col + hbs, lflvl,
1106                       yoff + 8 * hbs * bytesperpixel,
1107                       uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1108         } else {
1109             bp = PARTITION_H;
1110             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1111         }
1112     } else if (row + hbs < s->rows) { // FIXME why not <=?
1113         if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1114             bp = PARTITION_SPLIT;
1115             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1116             yoff  += hbs * 8 * y_stride;
1117             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1118             decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1119         } else {
1120             bp = PARTITION_V;
1121             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1122         }
1123     } else {
1124         bp = PARTITION_SPLIT;
1125         decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1126     }
1127     td->counts.partition[bl][c][bp]++;
1128 }
1129
1130 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1131                           ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1132 {
1133     const VP9Context *s = td->s;
1134     VP9Block *b = td->b;
1135     ptrdiff_t hbs = 4 >> bl;
1136     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1137     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1138     int bytesperpixel = s->bytesperpixel;
1139
1140     if (bl == BL_8X8) {
1141         av_assert2(b->bl == BL_8X8);
1142         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1143     } else if (td->b->bl == bl) {
1144         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1145         if (b->bp == PARTITION_H && row + hbs < s->rows) {
1146             yoff  += hbs * 8 * y_stride;
1147             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1148             ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1149         } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1150             yoff  += hbs * 8 * bytesperpixel;
1151             uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1152             ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1153         }
1154     } else {
1155         decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1156         if (col + hbs < s->cols) { // FIXME why not <=?
1157             if (row + hbs < s->rows) {
1158                 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1159                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1160                 yoff  += hbs * 8 * y_stride;
1161                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1162                 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1163                 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1164                               yoff + 8 * hbs * bytesperpixel,
1165                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1166             } else {
1167                 yoff  += hbs * 8 * bytesperpixel;
1168                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1169                 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1170             }
1171         } else if (row + hbs < s->rows) {
1172             yoff  += hbs * 8 * y_stride;
1173             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1174             decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1175         }
1176     }
1177 }
1178
1179 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1180 {
1181     int sb_start = ( idx      * n) >> log2_n;
1182     int sb_end   = ((idx + 1) * n) >> log2_n;
1183     *start = FFMIN(sb_start, n) << 3;
1184     *end   = FFMIN(sb_end,   n) << 3;
1185 }
1186
1187 static void free_buffers(VP9Context *s)
1188 {
1189     int i;
1190
1191     av_freep(&s->intra_pred_data[0]);
1192     for (i = 0; i < s->active_tile_cols; i++) {
1193         av_freep(&s->td[i].b_base);
1194         av_freep(&s->td[i].block_base);
1195     }
1196 }
1197
1198 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1199 {
1200     VP9Context *s = avctx->priv_data;
1201     int i;
1202
1203     for (i = 0; i < 3; i++) {
1204         if (s->s.frames[i].tf.f->buf[0])
1205             vp9_frame_unref(avctx, &s->s.frames[i]);
1206         av_frame_free(&s->s.frames[i].tf.f);
1207     }
1208     for (i = 0; i < 8; i++) {
1209         if (s->s.refs[i].f->buf[0])
1210             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1211         av_frame_free(&s->s.refs[i].f);
1212         if (s->next_refs[i].f->buf[0])
1213             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1214         av_frame_free(&s->next_refs[i].f);
1215     }
1216
1217     free_buffers(s);
1218     vp9_free_entries(avctx);
1219     av_freep(&s->td);
1220     return 0;
1221 }
1222
1223 static int decode_tiles(AVCodecContext *avctx,
1224                         const uint8_t *data, int size)
1225 {
1226     VP9Context *s = avctx->priv_data;
1227     VP9TileData *td = &s->td[0];
1228     int row, col, tile_row, tile_col, ret;
1229     int bytesperpixel;
1230     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1231     AVFrame *f;
1232     ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1233
1234     f = s->s.frames[CUR_FRAME].tf.f;
1235     ls_y = f->linesize[0];
1236     ls_uv =f->linesize[1];
1237     bytesperpixel = s->bytesperpixel;
1238
1239     yoff = uvoff = 0;
1240     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1241         set_tile_offset(&tile_row_start, &tile_row_end,
1242                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1243
1244         for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1245             int64_t tile_size;
1246
1247             if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1248                 tile_row == s->s.h.tiling.tile_rows - 1) {
1249                 tile_size = size;
1250             } else {
1251                 tile_size = AV_RB32(data);
1252                 data += 4;
1253                 size -= 4;
1254             }
1255             if (tile_size > size) {
1256                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1257                 return AVERROR_INVALIDDATA;
1258             }
1259             ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1260             if (ret < 0)
1261                 return ret;
1262             if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1263                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1264                 return AVERROR_INVALIDDATA;
1265             }
1266             data += tile_size;
1267             size -= tile_size;
1268         }
1269
1270         for (row = tile_row_start; row < tile_row_end;
1271              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1272             VP9Filter *lflvl_ptr = s->lflvl;
1273             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1274
1275             for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1276                 set_tile_offset(&tile_col_start, &tile_col_end,
1277                                 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1278                 td->tile_col_start = tile_col_start;
1279                 if (s->pass != 2) {
1280                     memset(td->left_partition_ctx, 0, 8);
1281                     memset(td->left_skip_ctx, 0, 8);
1282                     if (s->s.h.keyframe || s->s.h.intraonly) {
1283                         memset(td->left_mode_ctx, DC_PRED, 16);
1284                     } else {
1285                         memset(td->left_mode_ctx, NEARESTMV, 8);
1286                     }
1287                     memset(td->left_y_nnz_ctx, 0, 16);
1288                     memset(td->left_uv_nnz_ctx, 0, 32);
1289                     memset(td->left_segpred_ctx, 0, 8);
1290
1291                     td->c = &td->c_b[tile_col];
1292                 }
1293
1294                 for (col = tile_col_start;
1295                      col < tile_col_end;
1296                      col += 8, yoff2 += 64 * bytesperpixel,
1297                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1298                     // FIXME integrate with lf code (i.e. zero after each
1299                     // use, similar to invtxfm coefficients, or similar)
1300                     if (s->pass != 1) {
1301                         memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1302                     }
1303
1304                     if (s->pass == 2) {
1305                         decode_sb_mem(td, row, col, lflvl_ptr,
1306                                       yoff2, uvoff2, BL_64X64);
1307                     } else {
1308                         decode_sb(td, row, col, lflvl_ptr,
1309                                   yoff2, uvoff2, BL_64X64);
1310                     }
1311                 }
1312             }
1313
1314             if (s->pass == 1)
1315                 continue;
1316
1317             // backup pre-loopfilter reconstruction data for intra
1318             // prediction of next row of sb64s
1319             if (row + 8 < s->rows) {
1320                 memcpy(s->intra_pred_data[0],
1321                        f->data[0] + yoff + 63 * ls_y,
1322                        8 * s->cols * bytesperpixel);
1323                 memcpy(s->intra_pred_data[1],
1324                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1325                        8 * s->cols * bytesperpixel >> s->ss_h);
1326                 memcpy(s->intra_pred_data[2],
1327                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1328                        8 * s->cols * bytesperpixel >> s->ss_h);
1329             }
1330
1331             // loopfilter one row
1332             if (s->s.h.filter.level) {
1333                 yoff2 = yoff;
1334                 uvoff2 = uvoff;
1335                 lflvl_ptr = s->lflvl;
1336                 for (col = 0; col < s->cols;
1337                      col += 8, yoff2 += 64 * bytesperpixel,
1338                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1339                     ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1340                                          yoff2, uvoff2);
1341                 }
1342             }
1343
1344             // FIXME maybe we can make this more finegrained by running the
1345             // loopfilter per-block instead of after each sbrow
1346             // In fact that would also make intra pred left preparation easier?
1347             ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1348         }
1349     }
1350     return 0;
1351 }
1352
1353 #if HAVE_THREADS
1354 static av_always_inline
1355 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1356                               int threadnr)
1357 {
1358     VP9Context *s = avctx->priv_data;
1359     VP9TileData *td = &s->td[jobnr];
1360     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1361     int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1362     unsigned tile_cols_len;
1363     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1364     VP9Filter *lflvl_ptr_base;
1365     AVFrame *f;
1366
1367     f = s->s.frames[CUR_FRAME].tf.f;
1368     ls_y = f->linesize[0];
1369     ls_uv =f->linesize[1];
1370
1371     set_tile_offset(&tile_col_start, &tile_col_end,
1372                     jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1373     td->tile_col_start  = tile_col_start;
1374     uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1375     yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1376     lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1377
1378     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1379         set_tile_offset(&tile_row_start, &tile_row_end,
1380                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1381
1382         td->c = &td->c_b[tile_row];
1383         for (row = tile_row_start; row < tile_row_end;
1384              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1385             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1386             VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1387
1388             memset(td->left_partition_ctx, 0, 8);
1389             memset(td->left_skip_ctx, 0, 8);
1390             if (s->s.h.keyframe || s->s.h.intraonly) {
1391                 memset(td->left_mode_ctx, DC_PRED, 16);
1392             } else {
1393                 memset(td->left_mode_ctx, NEARESTMV, 8);
1394             }
1395             memset(td->left_y_nnz_ctx, 0, 16);
1396             memset(td->left_uv_nnz_ctx, 0, 32);
1397             memset(td->left_segpred_ctx, 0, 8);
1398
1399             for (col = tile_col_start;
1400                  col < tile_col_end;
1401                  col += 8, yoff2 += 64 * bytesperpixel,
1402                  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1403                 // FIXME integrate with lf code (i.e. zero after each
1404                 // use, similar to invtxfm coefficients, or similar)
1405                 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1406                 decode_sb(td, row, col, lflvl_ptr,
1407                             yoff2, uvoff2, BL_64X64);
1408             }
1409
1410             // backup pre-loopfilter reconstruction data for intra
1411             // prediction of next row of sb64s
1412             tile_cols_len = tile_col_end - tile_col_start;
1413             if (row + 8 < s->rows) {
1414                 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1415                        f->data[0] + yoff + 63 * ls_y,
1416                        8 * tile_cols_len * bytesperpixel);
1417                 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1418                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1419                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1420                 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1421                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1422                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1423             }
1424
1425             vp9_report_tile_progress(s, row >> 3, 1);
1426         }
1427     }
1428     return 0;
1429 }
1430
1431 static av_always_inline
1432 int loopfilter_proc(AVCodecContext *avctx)
1433 {
1434     VP9Context *s = avctx->priv_data;
1435     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1436     VP9Filter *lflvl_ptr;
1437     int bytesperpixel = s->bytesperpixel, col, i;
1438     AVFrame *f;
1439
1440     f = s->s.frames[CUR_FRAME].tf.f;
1441     ls_y = f->linesize[0];
1442     ls_uv =f->linesize[1];
1443
1444     for (i = 0; i < s->sb_rows; i++) {
1445         vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1446
1447         if (s->s.h.filter.level) {
1448             yoff = (ls_y * 64)*i;
1449             uvoff =  (ls_uv * 64 >> s->ss_v)*i;
1450             lflvl_ptr = s->lflvl+s->sb_cols*i;
1451             for (col = 0; col < s->cols;
1452                  col += 8, yoff += 64 * bytesperpixel,
1453                  uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1454                 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1455                                      yoff, uvoff);
1456             }
1457         }
1458     }
1459     return 0;
1460 }
1461 #endif
1462
1463 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1464                             int *got_frame, AVPacket *pkt)
1465 {
1466     const uint8_t *data = pkt->data;
1467     int size = pkt->size;
1468     VP9Context *s = avctx->priv_data;
1469     int ret, i, j, ref;
1470     int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1471                             (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1472     AVFrame *f;
1473
1474     if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1475         return ret;
1476     } else if (ret == 0) {
1477         if (!s->s.refs[ref].f->buf[0]) {
1478             av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1479             return AVERROR_INVALIDDATA;
1480         }
1481         if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1482             return ret;
1483         ((AVFrame *)frame)->pts = pkt->pts;
1484 #if FF_API_PKT_PTS
1485 FF_DISABLE_DEPRECATION_WARNINGS
1486         ((AVFrame *)frame)->pkt_pts = pkt->pts;
1487 FF_ENABLE_DEPRECATION_WARNINGS
1488 #endif
1489         ((AVFrame *)frame)->pkt_dts = pkt->dts;
1490         for (i = 0; i < 8; i++) {
1491             if (s->next_refs[i].f->buf[0])
1492                 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1493             if (s->s.refs[i].f->buf[0] &&
1494                 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1495                 return ret;
1496         }
1497         *got_frame = 1;
1498         return pkt->size;
1499     }
1500     data += ret;
1501     size -= ret;
1502
1503     if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1504         if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1505             vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1506         if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1507             (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1508             return ret;
1509     }
1510     if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1511         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1512     if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1513         (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1514         return ret;
1515     if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1516         vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1517     if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1518         return ret;
1519     f = s->s.frames[CUR_FRAME].tf.f;
1520     f->key_frame = s->s.h.keyframe;
1521     f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1522
1523     if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1524         (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
1525          s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1526         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1527     }
1528
1529     // ref frame setup
1530     for (i = 0; i < 8; i++) {
1531         if (s->next_refs[i].f->buf[0])
1532             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1533         if (s->s.h.refreshrefmask & (1 << i)) {
1534             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1535         } else if (s->s.refs[i].f->buf[0]) {
1536             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1537         }
1538         if (ret < 0)
1539             return ret;
1540     }
1541
1542     if (avctx->hwaccel) {
1543         ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1544         if (ret < 0)
1545             return ret;
1546         ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1547         if (ret < 0)
1548             return ret;
1549         ret = avctx->hwaccel->end_frame(avctx);
1550         if (ret < 0)
1551             return ret;
1552         goto finish;
1553     }
1554
1555     // main tile decode loop
1556     memset(s->above_partition_ctx, 0, s->cols);
1557     memset(s->above_skip_ctx, 0, s->cols);
1558     if (s->s.h.keyframe || s->s.h.intraonly) {
1559         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1560     } else {
1561         memset(s->above_mode_ctx, NEARESTMV, s->cols);
1562     }
1563     memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1564     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1565     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1566     memset(s->above_segpred_ctx, 0, s->cols);
1567     s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1568         avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1569     if ((ret = update_block_buffers(avctx)) < 0) {
1570         av_log(avctx, AV_LOG_ERROR,
1571                "Failed to allocate block buffers\n");
1572         return ret;
1573     }
1574     if (s->s.h.refreshctx && s->s.h.parallelmode) {
1575         int j, k, l, m;
1576
1577         for (i = 0; i < 4; i++) {
1578             for (j = 0; j < 2; j++)
1579                 for (k = 0; k < 2; k++)
1580                     for (l = 0; l < 6; l++)
1581                         for (m = 0; m < 6; m++)
1582                             memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1583                                    s->prob.coef[i][j][k][l][m], 3);
1584             if (s->s.h.txfmmode == i)
1585                 break;
1586         }
1587         s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1588         ff_thread_finish_setup(avctx);
1589     } else if (!s->s.h.refreshctx) {
1590         ff_thread_finish_setup(avctx);
1591     }
1592
1593 #if HAVE_THREADS
1594     if (avctx->active_thread_type & FF_THREAD_SLICE) {
1595         for (i = 0; i < s->sb_rows; i++)
1596             atomic_store(&s->entries[i], 0);
1597     }
1598 #endif
1599
1600     do {
1601         for (i = 0; i < s->active_tile_cols; i++) {
1602             s->td[i].b = s->td[i].b_base;
1603             s->td[i].block = s->td[i].block_base;
1604             s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1605             s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1606             s->td[i].eob = s->td[i].eob_base;
1607             s->td[i].uveob[0] = s->td[i].uveob_base[0];
1608             s->td[i].uveob[1] = s->td[i].uveob_base[1];
1609         }
1610
1611 #if HAVE_THREADS
1612         if (avctx->active_thread_type == FF_THREAD_SLICE) {
1613             int tile_row, tile_col;
1614
1615             av_assert1(!s->pass);
1616
1617             for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1618                 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1619                     int64_t tile_size;
1620
1621                     if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1622                         tile_row == s->s.h.tiling.tile_rows - 1) {
1623                         tile_size = size;
1624                     } else {
1625                         tile_size = AV_RB32(data);
1626                         data += 4;
1627                         size -= 4;
1628                     }
1629                     if (tile_size > size)
1630                         return AVERROR_INVALIDDATA;
1631                     ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1632                     if (ret < 0)
1633                         return ret;
1634                     if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1635                         return AVERROR_INVALIDDATA;
1636                     data += tile_size;
1637                     size -= tile_size;
1638                 }
1639             }
1640
1641             ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1642         } else
1643 #endif
1644         {
1645             ret = decode_tiles(avctx, data, size);
1646             if (ret < 0)
1647                 return ret;
1648         }
1649
1650         // Sum all counts fields into td[0].counts for tile threading
1651         if (avctx->active_thread_type == FF_THREAD_SLICE)
1652             for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1653                 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1654                     ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1655
1656         if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1657             ff_vp9_adapt_probs(s);
1658             ff_thread_finish_setup(avctx);
1659         }
1660     } while (s->pass++ == 1);
1661     ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1662
1663 finish:
1664     // ref frame setup
1665     for (i = 0; i < 8; i++) {
1666         if (s->s.refs[i].f->buf[0])
1667             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1668         if (s->next_refs[i].f->buf[0] &&
1669             (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1670             return ret;
1671     }
1672
1673     if (!s->s.h.invisible) {
1674         if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1675             return ret;
1676         *got_frame = 1;
1677     }
1678
1679     return pkt->size;
1680 }
1681
1682 static void vp9_decode_flush(AVCodecContext *avctx)
1683 {
1684     VP9Context *s = avctx->priv_data;
1685     int i;
1686
1687     for (i = 0; i < 3; i++)
1688         vp9_frame_unref(avctx, &s->s.frames[i]);
1689     for (i = 0; i < 8; i++)
1690         ff_thread_release_buffer(avctx, &s->s.refs[i]);
1691 }
1692
1693 static int init_frames(AVCodecContext *avctx)
1694 {
1695     VP9Context *s = avctx->priv_data;
1696     int i;
1697
1698     for (i = 0; i < 3; i++) {
1699         s->s.frames[i].tf.f = av_frame_alloc();
1700         if (!s->s.frames[i].tf.f) {
1701             vp9_decode_free(avctx);
1702             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1703             return AVERROR(ENOMEM);
1704         }
1705     }
1706     for (i = 0; i < 8; i++) {
1707         s->s.refs[i].f = av_frame_alloc();
1708         s->next_refs[i].f = av_frame_alloc();
1709         if (!s->s.refs[i].f || !s->next_refs[i].f) {
1710             vp9_decode_free(avctx);
1711             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1712             return AVERROR(ENOMEM);
1713         }
1714     }
1715
1716     return 0;
1717 }
1718
1719 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1720 {
1721     VP9Context *s = avctx->priv_data;
1722
1723     avctx->internal->allocate_progress = 1;
1724     s->last_bpp = 0;
1725     s->s.h.filter.sharpness = -1;
1726
1727     return init_frames(avctx);
1728 }
1729
1730 #if HAVE_THREADS
1731 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1732 {
1733     return init_frames(avctx);
1734 }
1735
1736 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1737 {
1738     int i, ret;
1739     VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1740
1741     for (i = 0; i < 3; i++) {
1742         if (s->s.frames[i].tf.f->buf[0])
1743             vp9_frame_unref(dst, &s->s.frames[i]);
1744         if (ssrc->s.frames[i].tf.f->buf[0]) {
1745             if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1746                 return ret;
1747         }
1748     }
1749     for (i = 0; i < 8; i++) {
1750         if (s->s.refs[i].f->buf[0])
1751             ff_thread_release_buffer(dst, &s->s.refs[i]);
1752         if (ssrc->next_refs[i].f->buf[0]) {
1753             if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1754                 return ret;
1755         }
1756     }
1757
1758     s->s.h.invisible = ssrc->s.h.invisible;
1759     s->s.h.keyframe = ssrc->s.h.keyframe;
1760     s->s.h.intraonly = ssrc->s.h.intraonly;
1761     s->ss_v = ssrc->ss_v;
1762     s->ss_h = ssrc->ss_h;
1763     s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1764     s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1765     s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1766     s->bytesperpixel = ssrc->bytesperpixel;
1767     s->gf_fmt = ssrc->gf_fmt;
1768     s->w = ssrc->w;
1769     s->h = ssrc->h;
1770     s->s.h.bpp = ssrc->s.h.bpp;
1771     s->bpp_index = ssrc->bpp_index;
1772     s->pix_fmt = ssrc->pix_fmt;
1773     memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1774     memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1775     memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1776            sizeof(s->s.h.segmentation.feat));
1777
1778     return 0;
1779 }
1780 #endif
1781
1782 AVCodec ff_vp9_decoder = {
1783     .name                  = "vp9",
1784     .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
1785     .type                  = AVMEDIA_TYPE_VIDEO,
1786     .id                    = AV_CODEC_ID_VP9,
1787     .priv_data_size        = sizeof(VP9Context),
1788     .init                  = vp9_decode_init,
1789     .close                 = vp9_decode_free,
1790     .decode                = vp9_decode_frame,
1791     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1792     .caps_internal         = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1793     .flush                 = vp9_decode_flush,
1794     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1795     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1796     .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1797 };