git.sesse.net Git - ffmpeg/blob - libavcodec/vp9.c

   1 /*
   2  * VP9 compatible video decoder
   3  *
   4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
   5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include "avcodec.h"
  25 #include "get_bits.h"
  26 #include "hwconfig.h"
  27 #include "internal.h"
  28 #include "profiles.h"
  29 #include "thread.h"
  30 #include "videodsp.h"
  31 #include "vp56.h"
  32 #include "vp9.h"
  33 #include "vp9data.h"
  34 #include "vp9dec.h"
  35 #include "libavutil/avassert.h"
  36 #include "libavutil/pixdesc.h"
  37
  38 #define VP9_SYNCCODE 0x498342
  39
  40 #if HAVE_THREADS
  41 static void vp9_free_entries(AVCodecContext *avctx) {
  42     VP9Context *s = avctx->priv_data;
  43
  44     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  45         pthread_mutex_destroy(&s->progress_mutex);
  46         pthread_cond_destroy(&s->progress_cond);
  47         av_freep(&s->entries);
  48     }
  49 }
  50
  51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
  52     VP9Context *s = avctx->priv_data;
  53     int i;
  54
  55     if (avctx->active_thread_type & FF_THREAD_SLICE)  {
  56         if (s->entries)
  57             av_freep(&s->entries);
  58
  59         s->entries = av_malloc_array(n, sizeof(atomic_int));
  60
  61         if (!s->entries) {
  62             av_freep(&s->entries);
  63             return AVERROR(ENOMEM);
  64         }
  65
  66         for (i  = 0; i < n; i++)
  67             atomic_init(&s->entries[i], 0);
  68
  69         pthread_mutex_init(&s->progress_mutex, NULL);
  70         pthread_cond_init(&s->progress_cond, NULL);
  71     }
  72     return 0;
  73 }
  74
  75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
  76     pthread_mutex_lock(&s->progress_mutex);
  77     atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
  78     pthread_cond_signal(&s->progress_cond);
  79     pthread_mutex_unlock(&s->progress_mutex);
  80 }
  81
  82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
  83     if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
  84         return;
  85
  86     pthread_mutex_lock(&s->progress_mutex);
  87     while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
  88         pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
  89     pthread_mutex_unlock(&s->progress_mutex);
  90 }
  91 #else
  92 static void vp9_free_entries(AVCodecContext *avctx) {}
  93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
  94 #endif
  95
  96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
  97 {
  98     ff_thread_release_buffer(avctx, &f->tf);
  99     av_buffer_unref(&f->extradata);
 100     av_buffer_unref(&f->hwaccel_priv_buf);
 101     f->segmentation_map = NULL;
 102     f->hwaccel_picture_private = NULL;
 103 }
 104
 105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
 106 {
 107     VP9Context *s = avctx->priv_data;
 108     int ret, sz;
 109
 110     ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
 111     if (ret < 0)
 112         return ret;
 113
 114     sz = 64 * s->sb_cols * s->sb_rows;
 115     if (sz != s->frame_extradata_pool_size) {
 116         av_buffer_pool_uninit(&s->frame_extradata_pool);
 117         s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
 118         if (!s->frame_extradata_pool) {
 119             s->frame_extradata_pool_size = 0;
 120             goto fail;
 121         }
 122         s->frame_extradata_pool_size = sz;
 123     }
 124     f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
 125     if (!f->extradata) {
 126         goto fail;
 127     }
 128     memset(f->extradata->data, 0, f->extradata->size);
 129
 130     f->segmentation_map = f->extradata->data;
 131     f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
 132
 133     if (avctx->hwaccel) {
 134         const AVHWAccel *hwaccel = avctx->hwaccel;
 135         av_assert0(!f->hwaccel_picture_private);
 136         if (hwaccel->frame_priv_data_size) {
 137             f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
 138             if (!f->hwaccel_priv_buf)
 139                 goto fail;
 140             f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
 141         }
 142     }
 143
 144     return 0;
 145
 146 fail:
 147     vp9_frame_unref(avctx, f);
 148     return AVERROR(ENOMEM);
 149 }
 150
 151 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
 152 {
 153     int ret;
 154
 155     ret = ff_thread_ref_frame(&dst->tf, &src->tf);
 156     if (ret < 0)
 157         return ret;
 158
 159     dst->extradata = av_buffer_ref(src->extradata);
 160     if (!dst->extradata)
 161         goto fail;
 162
 163     dst->segmentation_map = src->segmentation_map;
 164     dst->mv = src->mv;
 165     dst->uses_2pass = src->uses_2pass;
 166
 167     if (src->hwaccel_picture_private) {
 168         dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
 169         if (!dst->hwaccel_priv_buf)
 170             goto fail;
 171         dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
 172     }
 173
 174     return 0;
 175
 176 fail:
 177     vp9_frame_unref(avctx, dst);
 178     return AVERROR(ENOMEM);
 179 }
 180
 181 static int update_size(AVCodecContext *avctx, int w, int h)
 182 {
 183 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
 184                      CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
 185                      CONFIG_VP9_NVDEC_HWACCEL + \
 186                      CONFIG_VP9_VAAPI_HWACCEL + \
 187                      CONFIG_VP9_VDPAU_HWACCEL)
 188     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
 189     VP9Context *s = avctx->priv_data;
 190     uint8_t *p;
 191     int bytesperpixel = s->bytesperpixel, ret, cols, rows;
 192     int lflvl_len, i;
 193
 194     av_assert0(w > 0 && h > 0);
 195
 196     if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
 197         if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
 198             return ret;
 199
 200         switch (s->pix_fmt) {
 201         case AV_PIX_FMT_YUV420P:
 202 #if CONFIG_VP9_VDPAU_HWACCEL
 203             *fmtp++ = AV_PIX_FMT_VDPAU;
 204 #endif
 205         case AV_PIX_FMT_YUV420P10:
 206 #if CONFIG_VP9_DXVA2_HWACCEL
 207             *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
 208 #endif
 209 #if CONFIG_VP9_D3D11VA_HWACCEL
 210             *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
 211             *fmtp++ = AV_PIX_FMT_D3D11;
 212 #endif
 213 #if CONFIG_VP9_NVDEC_HWACCEL
 214             *fmtp++ = AV_PIX_FMT_CUDA;
 215 #endif
 216 #if CONFIG_VP9_VAAPI_HWACCEL
 217             *fmtp++ = AV_PIX_FMT_VAAPI;
 218 #endif
 219             break;
 220         case AV_PIX_FMT_YUV420P12:
 221 #if CONFIG_VP9_NVDEC_HWACCEL
 222             *fmtp++ = AV_PIX_FMT_CUDA;
 223 #endif
 224 #if CONFIG_VP9_VAAPI_HWACCEL
 225             *fmtp++ = AV_PIX_FMT_VAAPI;
 226 #endif
 227             break;
 228         }
 229
 230         *fmtp++ = s->pix_fmt;
 231         *fmtp = AV_PIX_FMT_NONE;
 232
 233         ret = ff_thread_get_format(avctx, pix_fmts);
 234         if (ret < 0)
 235             return ret;
 236
 237         avctx->pix_fmt = ret;
 238         s->gf_fmt  = s->pix_fmt;
 239         s->w = w;
 240         s->h = h;
 241     }
 242
 243     cols = (w + 7) >> 3;
 244     rows = (h + 7) >> 3;
 245
 246     if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
 247         return 0;
 248
 249     s->last_fmt  = s->pix_fmt;
 250     s->sb_cols   = (w + 63) >> 6;
 251     s->sb_rows   = (h + 63) >> 6;
 252     s->cols      = (w + 7) >> 3;
 253     s->rows      = (h + 7) >> 3;
 254     lflvl_len    = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
 255
 256 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
 257     av_freep(&s->intra_pred_data[0]);
 258     // FIXME we slightly over-allocate here for subsampled chroma, but a little
 259     // bit of padding shouldn't affect performance...
 260     p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
 261                                 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
 262     if (!p)
 263         return AVERROR(ENOMEM);
 264     assign(s->intra_pred_data[0],  uint8_t *,             64 * bytesperpixel);
 265     assign(s->intra_pred_data[1],  uint8_t *,             64 * bytesperpixel);
 266     assign(s->intra_pred_data[2],  uint8_t *,             64 * bytesperpixel);
 267     assign(s->above_y_nnz_ctx,     uint8_t *,             16);
 268     assign(s->above_mode_ctx,      uint8_t *,             16);
 269     assign(s->above_mv_ctx,        VP56mv(*)[2],          16);
 270     assign(s->above_uv_nnz_ctx[0], uint8_t *,             16);
 271     assign(s->above_uv_nnz_ctx[1], uint8_t *,             16);
 272     assign(s->above_partition_ctx, uint8_t *,              8);
 273     assign(s->above_skip_ctx,      uint8_t *,              8);
 274     assign(s->above_txfm_ctx,      uint8_t *,              8);
 275     assign(s->above_segpred_ctx,   uint8_t *,              8);
 276     assign(s->above_intra_ctx,     uint8_t *,              8);
 277     assign(s->above_comp_ctx,      uint8_t *,              8);
 278     assign(s->above_ref_ctx,       uint8_t *,              8);
 279     assign(s->above_filter_ctx,    uint8_t *,              8);
 280     assign(s->lflvl,               VP9Filter *,            lflvl_len);
 281 #undef assign
 282
 283     if (s->td) {
 284         for (i = 0; i < s->active_tile_cols; i++) {
 285             av_freep(&s->td[i].b_base);
 286             av_freep(&s->td[i].block_base);
 287         }
 288     }
 289
 290     if (s->s.h.bpp != s->last_bpp) {
 291         ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
 292         ff_videodsp_init(&s->vdsp, s->s.h.bpp);
 293         s->last_bpp = s->s.h.bpp;
 294     }
 295
 296     return 0;
 297 }
 298
 299 static int update_block_buffers(AVCodecContext *avctx)
 300 {
 301     int i;
 302     VP9Context *s = avctx->priv_data;
 303     int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
 304     VP9TileData *td = &s->td[0];
 305
 306     if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
 307         return 0;
 308
 309     av_free(td->b_base);
 310     av_free(td->block_base);
 311     chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
 312     chroma_eobs   = 16 * 16 >> (s->ss_h + s->ss_v);
 313     if (s->s.frames[CUR_FRAME].uses_2pass) {
 314         int sbs = s->sb_cols * s->sb_rows;
 315
 316         td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
 317         td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 318                                     16 * 16 + 2 * chroma_eobs) * sbs);
 319         if (!td->b_base || !td->block_base)
 320             return AVERROR(ENOMEM);
 321         td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
 322         td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
 323         td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
 324         td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
 325         td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
 326     } else {
 327         for (i = 1; i < s->active_tile_cols; i++) {
 328             if (s->td[i].b_base && s->td[i].block_base) {
 329                 av_free(s->td[i].b_base);
 330                 av_free(s->td[i].block_base);
 331             }
 332         }
 333         for (i = 0; i < s->active_tile_cols; i++) {
 334             s->td[i].b_base = av_malloc(sizeof(VP9Block));
 335             s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
 336                                        16 * 16 + 2 * chroma_eobs);
 337             if (!s->td[i].b_base || !s->td[i].block_base)
 338                 return AVERROR(ENOMEM);
 339             s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
 340             s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
 341             s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
 342             s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
 343             s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
 344         }
 345     }
 346     s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
 347
 348     return 0;
 349 }
 350
 351 // The sign bit is at the end, not the start, of a bit sequence
 352 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
 353 {
 354     int v = get_bits(gb, n);
 355     return get_bits1(gb) ? -v : v;
 356 }
 357
 358 static av_always_inline int inv_recenter_nonneg(int v, int m)
 359 {
 360     if (v > 2 * m)
 361         return v;
 362     if (v & 1)
 363         return m - ((v + 1) >> 1);
 364     return m + (v >> 1);
 365 }
 366
 367 // differential forward probability updates
 368 static int update_prob(VP56RangeCoder *c, int p)
 369 {
 370     static const uint8_t inv_map_table[255] = {
 371           7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
 372         189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
 373          10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
 374          25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
 375          40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
 376          55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
 377          70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
 378          86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
 379         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
 380         116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
 381         131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
 382         146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 383         161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
 384         177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
 385         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
 386         207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
 387         222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
 388         237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
 389         252, 253, 253,
 390     };
 391     int d;
 392
 393     /* This code is trying to do a differential probability update. For a
 394      * current probability A in the range [1, 255], the difference to a new
 395      * probability of any value can be expressed differentially as 1-A, 255-A
 396      * where some part of this (absolute range) exists both in positive as
 397      * well as the negative part, whereas another part only exists in one
 398      * half. We're trying to code this shared part differentially, i.e.
 399      * times two where the value of the lowest bit specifies the sign, and
 400      * the single part is then coded on top of this. This absolute difference
 401      * then again has a value of [0, 254], but a bigger value in this range
 402      * indicates that we're further away from the original value A, so we
 403      * can code this as a VLC code, since higher values are increasingly
 404      * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
 405      * updates vs. the 'fine, exact' updates further down the range, which
 406      * adds one extra dimension to this differential update model. */
 407
 408     if (!vp8_rac_get(c)) {
 409         d = vp8_rac_get_uint(c, 4) + 0;
 410     } else if (!vp8_rac_get(c)) {
 411         d = vp8_rac_get_uint(c, 4) + 16;
 412     } else if (!vp8_rac_get(c)) {
 413         d = vp8_rac_get_uint(c, 5) + 32;
 414     } else {
 415         d = vp8_rac_get_uint(c, 7);
 416         if (d >= 65)
 417             d = (d << 1) - 65 + vp8_rac_get(c);
 418         d += 64;
 419         av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
 420     }
 421
 422     return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
 423                     255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 424 }
 425
 426 static int read_colorspace_details(AVCodecContext *avctx)
 427 {
 428     static const enum AVColorSpace colorspaces[8] = {
 429         AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
 430         AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
 431     };
 432     VP9Context *s = avctx->priv_data;
 433     int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
 434
 435     s->bpp_index = bits;
 436     s->s.h.bpp = 8 + bits * 2;
 437     s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
 438     avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
 439     if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
 440         static const enum AVPixelFormat pix_fmt_rgb[3] = {
 441             AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
 442         };
 443         s->ss_h = s->ss_v = 0;
 444         avctx->color_range = AVCOL_RANGE_JPEG;
 445         s->pix_fmt = pix_fmt_rgb[bits];
 446         if (avctx->profile & 1) {
 447             if (get_bits1(&s->gb)) {
 448                 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
 449                 return AVERROR_INVALIDDATA;
 450             }
 451         } else {
 452             av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
 453                    avctx->profile);
 454             return AVERROR_INVALIDDATA;
 455         }
 456     } else {
 457         static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
 458             { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
 459               { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
 460             { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
 461               { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
 462             { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
 463               { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
 464         };
 465         avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
 466         if (avctx->profile & 1) {
 467             s->ss_h = get_bits1(&s->gb);
 468             s->ss_v = get_bits1(&s->gb);
 469             s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
 470             if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
 471                 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
 472                        avctx->profile);
 473                 return AVERROR_INVALIDDATA;
 474             } else if (get_bits1(&s->gb)) {
 475                 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
 476                        avctx->profile);
 477                 return AVERROR_INVALIDDATA;
 478             }
 479         } else {
 480             s->ss_h = s->ss_v = 1;
 481             s->pix_fmt = pix_fmt_for_ss[bits][1][1];
 482         }
 483     }
 484
 485     return 0;
 486 }
 487
 488 static int decode_frame_header(AVCodecContext *avctx,
 489                                const uint8_t *data, int size, int *ref)
 490 {
 491     VP9Context *s = avctx->priv_data;
 492     int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
 493     int last_invisible;
 494     const uint8_t *data2;
 495
 496     /* general header */
 497     if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
 498         av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
 499         return ret;
 500     }
 501     if (get_bits(&s->gb, 2) != 0x2) { // frame marker
 502         av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
 503         return AVERROR_INVALIDDATA;
 504     }
 505     avctx->profile  = get_bits1(&s->gb);
 506     avctx->profile |= get_bits1(&s->gb) << 1;
 507     if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
 508     if (avctx->profile > 3) {
 509         av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
 510         return AVERROR_INVALIDDATA;
 511     }
 512     s->s.h.profile = avctx->profile;
 513     if (get_bits1(&s->gb)) {
 514         *ref = get_bits(&s->gb, 3);
 515         return 0;
 516     }
 517
 518     s->last_keyframe  = s->s.h.keyframe;
 519     s->s.h.keyframe   = !get_bits1(&s->gb);
 520
 521     last_invisible   = s->s.h.invisible;
 522     s->s.h.invisible = !get_bits1(&s->gb);
 523     s->s.h.errorres  = get_bits1(&s->gb);
 524     s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
 525
 526     if (s->s.h.keyframe) {
 527         if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 528             av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 529             return AVERROR_INVALIDDATA;
 530         }
 531         if ((ret = read_colorspace_details(avctx)) < 0)
 532             return ret;
 533         // for profile 1, here follows the subsampling bits
 534         s->s.h.refreshrefmask = 0xff;
 535         w = get_bits(&s->gb, 16) + 1;
 536         h = get_bits(&s->gb, 16) + 1;
 537         if (get_bits1(&s->gb)) // display size
 538             skip_bits(&s->gb, 32);
 539     } else {
 540         s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
 541         s->s.h.resetctx  = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
 542         if (s->s.h.intraonly) {
 543             if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 544                 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 545                 return AVERROR_INVALIDDATA;
 546             }
 547             if (avctx->profile >= 1) {
 548                 if ((ret = read_colorspace_details(avctx)) < 0)
 549                     return ret;
 550             } else {
 551                 s->ss_h = s->ss_v = 1;
 552                 s->s.h.bpp = 8;
 553                 s->bpp_index = 0;
 554                 s->bytesperpixel = 1;
 555                 s->pix_fmt = AV_PIX_FMT_YUV420P;
 556                 avctx->colorspace = AVCOL_SPC_BT470BG;
 557                 avctx->color_range = AVCOL_RANGE_MPEG;
 558             }
 559             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 560             w = get_bits(&s->gb, 16) + 1;
 561             h = get_bits(&s->gb, 16) + 1;
 562             if (get_bits1(&s->gb)) // display size
 563                 skip_bits(&s->gb, 32);
 564         } else {
 565             s->s.h.refreshrefmask = get_bits(&s->gb, 8);
 566             s->s.h.refidx[0]      = get_bits(&s->gb, 3);
 567             s->s.h.signbias[0]    = get_bits1(&s->gb) && !s->s.h.errorres;
 568             s->s.h.refidx[1]      = get_bits(&s->gb, 3);
 569             s->s.h.signbias[1]    = get_bits1(&s->gb) && !s->s.h.errorres;
 570             s->s.h.refidx[2]      = get_bits(&s->gb, 3);
 571             s->s.h.signbias[2]    = get_bits1(&s->gb) && !s->s.h.errorres;
 572             if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
 573                 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
 574                 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
 575                 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
 576                 return AVERROR_INVALIDDATA;
 577             }
 578             if (get_bits1(&s->gb)) {
 579                 w = s->s.refs[s->s.h.refidx[0]].f->width;
 580                 h = s->s.refs[s->s.h.refidx[0]].f->height;
 581             } else if (get_bits1(&s->gb)) {
 582                 w = s->s.refs[s->s.h.refidx[1]].f->width;
 583                 h = s->s.refs[s->s.h.refidx[1]].f->height;
 584             } else if (get_bits1(&s->gb)) {
 585                 w = s->s.refs[s->s.h.refidx[2]].f->width;
 586                 h = s->s.refs[s->s.h.refidx[2]].f->height;
 587             } else {
 588                 w = get_bits(&s->gb, 16) + 1;
 589                 h = get_bits(&s->gb, 16) + 1;
 590             }
 591             // Note that in this code, "CUR_FRAME" is actually before we
 592             // have formally allocated a frame, and thus actually represents
 593             // the _last_ frame
 594             s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
 595                                        s->s.frames[CUR_FRAME].tf.f->height == h;
 596             if (get_bits1(&s->gb)) // display size
 597                 skip_bits(&s->gb, 32);
 598             s->s.h.highprecisionmvs = get_bits1(&s->gb);
 599             s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
 600                                                   get_bits(&s->gb, 2);
 601             s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
 602                                   s->s.h.signbias[0] != s->s.h.signbias[2];
 603             if (s->s.h.allowcompinter) {
 604                 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
 605                     s->s.h.fixcompref    = 2;
 606                     s->s.h.varcompref[0] = 0;
 607                     s->s.h.varcompref[1] = 1;
 608                 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
 609                     s->s.h.fixcompref    = 1;
 610                     s->s.h.varcompref[0] = 0;
 611                     s->s.h.varcompref[1] = 2;
 612                 } else {
 613                     s->s.h.fixcompref    = 0;
 614                     s->s.h.varcompref[0] = 1;
 615                     s->s.h.varcompref[1] = 2;
 616                 }
 617             }
 618         }
 619     }
 620     s->s.h.refreshctx   = s->s.h.errorres ? 0 : get_bits1(&s->gb);
 621     s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
 622     s->s.h.framectxid   = c = get_bits(&s->gb, 2);
 623     if (s->s.h.keyframe || s->s.h.intraonly)
 624         s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
 625
 626     /* loopfilter header data */
 627     if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
 628         // reset loopfilter defaults
 629         s->s.h.lf_delta.ref[0] = 1;
 630         s->s.h.lf_delta.ref[1] = 0;
 631         s->s.h.lf_delta.ref[2] = -1;
 632         s->s.h.lf_delta.ref[3] = -1;
 633         s->s.h.lf_delta.mode[0] = 0;
 634         s->s.h.lf_delta.mode[1] = 0;
 635         memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
 636     }
 637     s->s.h.filter.level = get_bits(&s->gb, 6);
 638     sharp = get_bits(&s->gb, 3);
 639     // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
 640     // the old cache values since they are still valid
 641     if (s->s.h.filter.sharpness != sharp) {
 642         for (i = 1; i <= 63; i++) {
 643             int limit = i;
 644
 645             if (sharp > 0) {
 646                 limit >>= (sharp + 3) >> 2;
 647                 limit = FFMIN(limit, 9 - sharp);
 648             }
 649             limit = FFMAX(limit, 1);
 650
 651             s->filter_lut.lim_lut[i] = limit;
 652             s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
 653         }
 654     }
 655     s->s.h.filter.sharpness = sharp;
 656     if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
 657         if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
 658             for (i = 0; i < 4; i++)
 659                 if (get_bits1(&s->gb))
 660                     s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
 661             for (i = 0; i < 2; i++)
 662                 if (get_bits1(&s->gb))
 663                     s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
 664         }
 665     }
 666
 667     /* quantization header data */
 668     s->s.h.yac_qi      = get_bits(&s->gb, 8);
 669     s->s.h.ydc_qdelta  = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 670     s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 671     s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
 672     s->s.h.lossless    = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
 673                        s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
 674     if (s->s.h.lossless)
 675         avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
 676
 677     /* segmentation header info */
 678     if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
 679         if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
 680             for (i = 0; i < 7; i++)
 681                 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
 682                                  get_bits(&s->gb, 8) : 255;
 683             if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
 684                 for (i = 0; i < 3; i++)
 685                     s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
 686                                          get_bits(&s->gb, 8) : 255;
 687         }
 688
 689         if (get_bits1(&s->gb)) {
 690             s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
 691             for (i = 0; i < 8; i++) {
 692                 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
 693                     s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
 694                 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
 695                     s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
 696                 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
 697                     s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
 698                 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
 699             }
 700         }
 701     }
 702
 703     // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
 704     for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
 705         int qyac, qydc, quvac, quvdc, lflvl, sh;
 706
 707         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
 708             if (s->s.h.segmentation.absolute_vals)
 709                 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
 710             else
 711                 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
 712         } else {
 713             qyac  = s->s.h.yac_qi;
 714         }
 715         qydc  = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
 716         quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
 717         quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
 718         qyac  = av_clip_uintp2(qyac, 8);
 719
 720         s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
 721         s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
 722         s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
 723         s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
 724
 725         sh = s->s.h.filter.level >= 32;
 726         if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
 727             if (s->s.h.segmentation.absolute_vals)
 728                 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
 729             else
 730                 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
 731         } else {
 732             lflvl  = s->s.h.filter.level;
 733         }
 734         if (s->s.h.lf_delta.enabled) {
 735             s->s.h.segmentation.feat[i].lflvl[0][0] =
 736             s->s.h.segmentation.feat[i].lflvl[0][1] =
 737                 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
 738             for (j = 1; j < 4; j++) {
 739                 s->s.h.segmentation.feat[i].lflvl[j][0] =
 740                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 741                                              s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
 742                 s->s.h.segmentation.feat[i].lflvl[j][1] =
 743                     av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
 744                                              s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
 745             }
 746         } else {
 747             memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
 748                    sizeof(s->s.h.segmentation.feat[i].lflvl));
 749         }
 750     }
 751
 752     /* tiling info */
 753     if ((ret = update_size(avctx, w, h)) < 0) {
 754         av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
 755                w, h, s->pix_fmt);
 756         return ret;
 757     }
 758     for (s->s.h.tiling.log2_tile_cols = 0;
 759          s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
 760          s->s.h.tiling.log2_tile_cols++) ;
 761     for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
 762     max = FFMAX(0, max - 1);
 763     while (max > s->s.h.tiling.log2_tile_cols) {
 764         if (get_bits1(&s->gb))
 765             s->s.h.tiling.log2_tile_cols++;
 766         else
 767             break;
 768     }
 769     s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
 770     s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
 771     if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
 772         int n_range_coders;
 773         VP56RangeCoder *rc;
 774
 775         if (s->td) {
 776             for (i = 0; i < s->active_tile_cols; i++) {
 777                 av_free(s->td[i].b_base);
 778                 av_free(s->td[i].block_base);
 779             }
 780             av_free(s->td);
 781         }
 782
 783         s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
 784         vp9_free_entries(avctx);
 785         s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
 786                               s->s.h.tiling.tile_cols : 1;
 787         vp9_alloc_entries(avctx, s->sb_rows);
 788         if (avctx->active_thread_type == FF_THREAD_SLICE) {
 789             n_range_coders = 4; // max_tile_rows
 790         } else {
 791             n_range_coders = s->s.h.tiling.tile_cols;
 792         }
 793         s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
 794                                  n_range_coders * sizeof(VP56RangeCoder));
 795         if (!s->td)
 796             return AVERROR(ENOMEM);
 797         rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
 798         for (i = 0; i < s->active_tile_cols; i++) {
 799             s->td[i].s = s;
 800             s->td[i].c_b = rc;
 801             rc += n_range_coders;
 802         }
 803     }
 804
 805     /* check reference frames */
 806     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 807         for (i = 0; i < 3; i++) {
 808             AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
 809             int refw = ref->width, refh = ref->height;
 810
 811             if (ref->format != avctx->pix_fmt) {
 812                 av_log(avctx, AV_LOG_ERROR,
 813                        "Ref pixfmt (%s) did not match current frame (%s)",
 814                        av_get_pix_fmt_name(ref->format),
 815                        av_get_pix_fmt_name(avctx->pix_fmt));
 816                 return AVERROR_INVALIDDATA;
 817             } else if (refw == w && refh == h) {
 818                 s->mvscale[i][0] = s->mvscale[i][1] = 0;
 819             } else {
 820                 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
 821                     av_log(avctx, AV_LOG_ERROR,
 822                            "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
 823                            refw, refh, w, h);
 824                     return AVERROR_INVALIDDATA;
 825                 }
 826                 s->mvscale[i][0] = (refw << 14) / w;
 827                 s->mvscale[i][1] = (refh << 14) / h;
 828                 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
 829                 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
 830             }
 831         }
 832     }
 833
 834     if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
 835         s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
 836                            s->prob_ctx[3].p = ff_vp9_default_probs;
 837         memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
 838                sizeof(ff_vp9_default_coef_probs));
 839         memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
 840                sizeof(ff_vp9_default_coef_probs));
 841         memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
 842                sizeof(ff_vp9_default_coef_probs));
 843         memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
 844                sizeof(ff_vp9_default_coef_probs));
 845     } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
 846         s->prob_ctx[c].p = ff_vp9_default_probs;
 847         memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
 848                sizeof(ff_vp9_default_coef_probs));
 849     }
 850
 851     // next 16 bits is size of the rest of the header (arith-coded)
 852     s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
 853     s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
 854
 855     data2 = align_get_bits(&s->gb);
 856     if (size2 > size - (data2 - data)) {
 857         av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
 858         return AVERROR_INVALIDDATA;
 859     }
 860     ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
 861     if (ret < 0)
 862         return ret;
 863
 864     if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
 865         av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
 866         return AVERROR_INVALIDDATA;
 867     }
 868
 869     for (i = 0; i < s->active_tile_cols; i++) {
 870         if (s->s.h.keyframe || s->s.h.intraonly) {
 871             memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
 872             memset(s->td[i].counts.eob,  0, sizeof(s->td[0].counts.eob));
 873         } else {
 874             memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
 875         }
 876     }
 877
 878     /* FIXME is it faster to not copy here, but do it down in the fw updates
 879      * as explicit copies if the fw update is missing (and skip the copy upon
 880      * fw update)? */
 881     s->prob.p = s->prob_ctx[c].p;
 882
 883     // txfm updates
 884     if (s->s.h.lossless) {
 885         s->s.h.txfmmode = TX_4X4;
 886     } else {
 887         s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
 888         if (s->s.h.txfmmode == 3)
 889             s->s.h.txfmmode += vp8_rac_get(&s->c);
 890
 891         if (s->s.h.txfmmode == TX_SWITCHABLE) {
 892             for (i = 0; i < 2; i++)
 893                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 894                     s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
 895             for (i = 0; i < 2; i++)
 896                 for (j = 0; j < 2; j++)
 897                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 898                         s->prob.p.tx16p[i][j] =
 899                             update_prob(&s->c, s->prob.p.tx16p[i][j]);
 900             for (i = 0; i < 2; i++)
 901                 for (j = 0; j < 3; j++)
 902                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 903                         s->prob.p.tx32p[i][j] =
 904                             update_prob(&s->c, s->prob.p.tx32p[i][j]);
 905         }
 906     }
 907
 908     // coef updates
 909     for (i = 0; i < 4; i++) {
 910         uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
 911         if (vp8_rac_get(&s->c)) {
 912             for (j = 0; j < 2; j++)
 913                 for (k = 0; k < 2; k++)
 914                     for (l = 0; l < 6; l++)
 915                         for (m = 0; m < 6; m++) {
 916                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 917                             uint8_t *r = ref[j][k][l][m];
 918                             if (m >= 3 && l == 0) // dc only has 3 pt
 919                                 break;
 920                             for (n = 0; n < 3; n++) {
 921                                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 922                                     p[n] = update_prob(&s->c, r[n]);
 923                                 else
 924                                     p[n] = r[n];
 925                             }
 926                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 927                         }
 928         } else {
 929             for (j = 0; j < 2; j++)
 930                 for (k = 0; k < 2; k++)
 931                     for (l = 0; l < 6; l++)
 932                         for (m = 0; m < 6; m++) {
 933                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 934                             uint8_t *r = ref[j][k][l][m];
 935                             if (m > 3 && l == 0) // dc only has 3 pt
 936                                 break;
 937                             memcpy(p, r, 3);
 938                             memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
 939                         }
 940         }
 941         if (s->s.h.txfmmode == i)
 942             break;
 943     }
 944
 945     // mode updates
 946     for (i = 0; i < 3; i++)
 947         if (vp56_rac_get_prob_branchy(&s->c, 252))
 948             s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
 949     if (!s->s.h.keyframe && !s->s.h.intraonly) {
 950         for (i = 0; i < 7; i++)
 951             for (j = 0; j < 3; j++)
 952                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 953                     s->prob.p.mv_mode[i][j] =
 954                         update_prob(&s->c, s->prob.p.mv_mode[i][j]);
 955
 956         if (s->s.h.filtermode == FILTER_SWITCHABLE)
 957             for (i = 0; i < 4; i++)
 958                 for (j = 0; j < 2; j++)
 959                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 960                         s->prob.p.filter[i][j] =
 961                             update_prob(&s->c, s->prob.p.filter[i][j]);
 962
 963         for (i = 0; i < 4; i++)
 964             if (vp56_rac_get_prob_branchy(&s->c, 252))
 965                 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
 966
 967         if (s->s.h.allowcompinter) {
 968             s->s.h.comppredmode = vp8_rac_get(&s->c);
 969             if (s->s.h.comppredmode)
 970                 s->s.h.comppredmode += vp8_rac_get(&s->c);
 971             if (s->s.h.comppredmode == PRED_SWITCHABLE)
 972                 for (i = 0; i < 5; i++)
 973                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 974                         s->prob.p.comp[i] =
 975                             update_prob(&s->c, s->prob.p.comp[i]);
 976         } else {
 977             s->s.h.comppredmode = PRED_SINGLEREF;
 978         }
 979
 980         if (s->s.h.comppredmode != PRED_COMPREF) {
 981             for (i = 0; i < 5; i++) {
 982                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 983                     s->prob.p.single_ref[i][0] =
 984                         update_prob(&s->c, s->prob.p.single_ref[i][0]);
 985                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 986                     s->prob.p.single_ref[i][1] =
 987                         update_prob(&s->c, s->prob.p.single_ref[i][1]);
 988             }
 989         }
 990
 991         if (s->s.h.comppredmode != PRED_SINGLEREF) {
 992             for (i = 0; i < 5; i++)
 993                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 994                     s->prob.p.comp_ref[i] =
 995                         update_prob(&s->c, s->prob.p.comp_ref[i]);
 996         }
 997
 998         for (i = 0; i < 4; i++)
 999             for (j = 0; j < 9; j++)
1000                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001                     s->prob.p.y_mode[i][j] =
1002                         update_prob(&s->c, s->prob.p.y_mode[i][j]);
1003
1004         for (i = 0; i < 4; i++)
1005             for (j = 0; j < 4; j++)
1006                 for (k = 0; k < 3; k++)
1007                     if (vp56_rac_get_prob_branchy(&s->c, 252))
1008                         s->prob.p.partition[3 - i][j][k] =
1009                             update_prob(&s->c,
1010                                         s->prob.p.partition[3 - i][j][k]);
1011
1012         // mv fields don't use the update_prob subexp model for some reason
1013         for (i = 0; i < 3; i++)
1014             if (vp56_rac_get_prob_branchy(&s->c, 252))
1015                 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1016
1017         for (i = 0; i < 2; i++) {
1018             if (vp56_rac_get_prob_branchy(&s->c, 252))
1019                 s->prob.p.mv_comp[i].sign =
1020                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1021
1022             for (j = 0; j < 10; j++)
1023                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1024                     s->prob.p.mv_comp[i].classes[j] =
1025                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1026
1027             if (vp56_rac_get_prob_branchy(&s->c, 252))
1028                 s->prob.p.mv_comp[i].class0 =
1029                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1030
1031             for (j = 0; j < 10; j++)
1032                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033                     s->prob.p.mv_comp[i].bits[j] =
1034                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1035         }
1036
1037         for (i = 0; i < 2; i++) {
1038             for (j = 0; j < 2; j++)
1039                 for (k = 0; k < 3; k++)
1040                     if (vp56_rac_get_prob_branchy(&s->c, 252))
1041                         s->prob.p.mv_comp[i].class0_fp[j][k] =
1042                             (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1043
1044             for (j = 0; j < 3; j++)
1045                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1046                     s->prob.p.mv_comp[i].fp[j] =
1047                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1048         }
1049
1050         if (s->s.h.highprecisionmvs) {
1051             for (i = 0; i < 2; i++) {
1052                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1053                     s->prob.p.mv_comp[i].class0_hp =
1054                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1055
1056                 if (vp56_rac_get_prob_branchy(&s->c, 252))
1057                     s->prob.p.mv_comp[i].hp =
1058                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1059             }
1060         }
1061     }
1062
1063     return (data2 - data) + size2;
1064 }
1065
1066 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1067                       ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1068 {
1069     const VP9Context *s = td->s;
1070     int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1071             (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1072     const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1073                                                      s->prob.p.partition[bl][c];
1074     enum BlockPartition bp;
1075     ptrdiff_t hbs = 4 >> bl;
1076     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1077     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1078     int bytesperpixel = s->bytesperpixel;
1079
1080     if (bl == BL_8X8) {
1081         bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1082         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1083     } else if (col + hbs < s->cols) { // FIXME why not <=?
1084         if (row + hbs < s->rows) { // FIXME why not <=?
1085             bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1086             switch (bp) {
1087             case PARTITION_NONE:
1088                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1089                 break;
1090             case PARTITION_H:
1091                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1092                 yoff  += hbs * 8 * y_stride;
1093                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1094                 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1095                 break;
1096             case PARTITION_V:
1097                 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1098                 yoff  += hbs * 8 * bytesperpixel;
1099                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1100                 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1101                 break;
1102             case PARTITION_SPLIT:
1103                 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1104                 decode_sb(td, row, col + hbs, lflvl,
1105                           yoff + 8 * hbs * bytesperpixel,
1106                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1107                 yoff  += hbs * 8 * y_stride;
1108                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1109                 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1110                 decode_sb(td, row + hbs, col + hbs, lflvl,
1111                           yoff + 8 * hbs * bytesperpixel,
1112                           uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1113                 break;
1114             default:
1115                 av_assert0(0);
1116             }
1117         } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1118             bp = PARTITION_SPLIT;
1119             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1120             decode_sb(td, row, col + hbs, lflvl,
1121                       yoff + 8 * hbs * bytesperpixel,
1122                       uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1123         } else {
1124             bp = PARTITION_H;
1125             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1126         }
1127     } else if (row + hbs < s->rows) { // FIXME why not <=?
1128         if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1129             bp = PARTITION_SPLIT;
1130             decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1131             yoff  += hbs * 8 * y_stride;
1132             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1133             decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1134         } else {
1135             bp = PARTITION_V;
1136             ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1137         }
1138     } else {
1139         bp = PARTITION_SPLIT;
1140         decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1141     }
1142     td->counts.partition[bl][c][bp]++;
1143 }
1144
1145 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1146                           ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1147 {
1148     const VP9Context *s = td->s;
1149     VP9Block *b = td->b;
1150     ptrdiff_t hbs = 4 >> bl;
1151     AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1152     ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1153     int bytesperpixel = s->bytesperpixel;
1154
1155     if (bl == BL_8X8) {
1156         av_assert2(b->bl == BL_8X8);
1157         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1158     } else if (td->b->bl == bl) {
1159         ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1160         if (b->bp == PARTITION_H && row + hbs < s->rows) {
1161             yoff  += hbs * 8 * y_stride;
1162             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1163             ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1164         } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1165             yoff  += hbs * 8 * bytesperpixel;
1166             uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1167             ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1168         }
1169     } else {
1170         decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1171         if (col + hbs < s->cols) { // FIXME why not <=?
1172             if (row + hbs < s->rows) {
1173                 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1174                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1175                 yoff  += hbs * 8 * y_stride;
1176                 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1177                 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1178                 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1179                               yoff + 8 * hbs * bytesperpixel,
1180                               uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1181             } else {
1182                 yoff  += hbs * 8 * bytesperpixel;
1183                 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1184                 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1185             }
1186         } else if (row + hbs < s->rows) {
1187             yoff  += hbs * 8 * y_stride;
1188             uvoff += hbs * 8 * uv_stride >> s->ss_v;
1189             decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1190         }
1191     }
1192 }
1193
1194 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1195 {
1196     int sb_start = ( idx      * n) >> log2_n;
1197     int sb_end   = ((idx + 1) * n) >> log2_n;
1198     *start = FFMIN(sb_start, n) << 3;
1199     *end   = FFMIN(sb_end,   n) << 3;
1200 }
1201
1202 static void free_buffers(VP9Context *s)
1203 {
1204     int i;
1205
1206     av_freep(&s->intra_pred_data[0]);
1207     for (i = 0; i < s->active_tile_cols; i++) {
1208         av_freep(&s->td[i].b_base);
1209         av_freep(&s->td[i].block_base);
1210     }
1211 }
1212
1213 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1214 {
1215     VP9Context *s = avctx->priv_data;
1216     int i;
1217
1218     for (i = 0; i < 3; i++) {
1219         vp9_frame_unref(avctx, &s->s.frames[i]);
1220         av_frame_free(&s->s.frames[i].tf.f);
1221     }
1222     av_buffer_pool_uninit(&s->frame_extradata_pool);
1223     for (i = 0; i < 8; i++) {
1224         ff_thread_release_buffer(avctx, &s->s.refs[i]);
1225         av_frame_free(&s->s.refs[i].f);
1226         ff_thread_release_buffer(avctx, &s->next_refs[i]);
1227         av_frame_free(&s->next_refs[i].f);
1228     }
1229
1230     free_buffers(s);
1231     vp9_free_entries(avctx);
1232     av_freep(&s->td);
1233     return 0;
1234 }
1235
1236 static int decode_tiles(AVCodecContext *avctx,
1237                         const uint8_t *data, int size)
1238 {
1239     VP9Context *s = avctx->priv_data;
1240     VP9TileData *td = &s->td[0];
1241     int row, col, tile_row, tile_col, ret;
1242     int bytesperpixel;
1243     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1244     AVFrame *f;
1245     ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1246
1247     f = s->s.frames[CUR_FRAME].tf.f;
1248     ls_y = f->linesize[0];
1249     ls_uv =f->linesize[1];
1250     bytesperpixel = s->bytesperpixel;
1251
1252     yoff = uvoff = 0;
1253     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1254         set_tile_offset(&tile_row_start, &tile_row_end,
1255                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1256
1257         for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1258             int64_t tile_size;
1259
1260             if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1261                 tile_row == s->s.h.tiling.tile_rows - 1) {
1262                 tile_size = size;
1263             } else {
1264                 tile_size = AV_RB32(data);
1265                 data += 4;
1266                 size -= 4;
1267             }
1268             if (tile_size > size) {
1269                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1270                 return AVERROR_INVALIDDATA;
1271             }
1272             ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1273             if (ret < 0)
1274                 return ret;
1275             if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1276                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1277                 return AVERROR_INVALIDDATA;
1278             }
1279             data += tile_size;
1280             size -= tile_size;
1281         }
1282
1283         for (row = tile_row_start; row < tile_row_end;
1284              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1285             VP9Filter *lflvl_ptr = s->lflvl;
1286             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1287
1288             for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1289                 set_tile_offset(&tile_col_start, &tile_col_end,
1290                                 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1291                 td->tile_col_start = tile_col_start;
1292                 if (s->pass != 2) {
1293                     memset(td->left_partition_ctx, 0, 8);
1294                     memset(td->left_skip_ctx, 0, 8);
1295                     if (s->s.h.keyframe || s->s.h.intraonly) {
1296                         memset(td->left_mode_ctx, DC_PRED, 16);
1297                     } else {
1298                         memset(td->left_mode_ctx, NEARESTMV, 8);
1299                     }
1300                     memset(td->left_y_nnz_ctx, 0, 16);
1301                     memset(td->left_uv_nnz_ctx, 0, 32);
1302                     memset(td->left_segpred_ctx, 0, 8);
1303
1304                     td->c = &td->c_b[tile_col];
1305                 }
1306
1307                 for (col = tile_col_start;
1308                      col < tile_col_end;
1309                      col += 8, yoff2 += 64 * bytesperpixel,
1310                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1311                     // FIXME integrate with lf code (i.e. zero after each
1312                     // use, similar to invtxfm coefficients, or similar)
1313                     if (s->pass != 1) {
1314                         memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1315                     }
1316
1317                     if (s->pass == 2) {
1318                         decode_sb_mem(td, row, col, lflvl_ptr,
1319                                       yoff2, uvoff2, BL_64X64);
1320                     } else {
1321                         if (vpX_rac_is_end(td->c)) {
1322                             return AVERROR_INVALIDDATA;
1323                         }
1324                         decode_sb(td, row, col, lflvl_ptr,
1325                                   yoff2, uvoff2, BL_64X64);
1326                     }
1327                 }
1328             }
1329
1330             if (s->pass == 1)
1331                 continue;
1332
1333             // backup pre-loopfilter reconstruction data for intra
1334             // prediction of next row of sb64s
1335             if (row + 8 < s->rows) {
1336                 memcpy(s->intra_pred_data[0],
1337                        f->data[0] + yoff + 63 * ls_y,
1338                        8 * s->cols * bytesperpixel);
1339                 memcpy(s->intra_pred_data[1],
1340                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1341                        8 * s->cols * bytesperpixel >> s->ss_h);
1342                 memcpy(s->intra_pred_data[2],
1343                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1344                        8 * s->cols * bytesperpixel >> s->ss_h);
1345             }
1346
1347             // loopfilter one row
1348             if (s->s.h.filter.level) {
1349                 yoff2 = yoff;
1350                 uvoff2 = uvoff;
1351                 lflvl_ptr = s->lflvl;
1352                 for (col = 0; col < s->cols;
1353                      col += 8, yoff2 += 64 * bytesperpixel,
1354                      uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1355                     ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1356                                          yoff2, uvoff2);
1357                 }
1358             }
1359
1360             // FIXME maybe we can make this more finegrained by running the
1361             // loopfilter per-block instead of after each sbrow
1362             // In fact that would also make intra pred left preparation easier?
1363             ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1364         }
1365     }
1366     return 0;
1367 }
1368
1369 #if HAVE_THREADS
1370 static av_always_inline
1371 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1372                               int threadnr)
1373 {
1374     VP9Context *s = avctx->priv_data;
1375     VP9TileData *td = &s->td[jobnr];
1376     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1377     int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1378     unsigned tile_cols_len;
1379     int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1380     VP9Filter *lflvl_ptr_base;
1381     AVFrame *f;
1382
1383     f = s->s.frames[CUR_FRAME].tf.f;
1384     ls_y = f->linesize[0];
1385     ls_uv =f->linesize[1];
1386
1387     set_tile_offset(&tile_col_start, &tile_col_end,
1388                     jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1389     td->tile_col_start  = tile_col_start;
1390     uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1391     yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1392     lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1393
1394     for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1395         set_tile_offset(&tile_row_start, &tile_row_end,
1396                         tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1397
1398         td->c = &td->c_b[tile_row];
1399         for (row = tile_row_start; row < tile_row_end;
1400              row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1401             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1402             VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1403
1404             memset(td->left_partition_ctx, 0, 8);
1405             memset(td->left_skip_ctx, 0, 8);
1406             if (s->s.h.keyframe || s->s.h.intraonly) {
1407                 memset(td->left_mode_ctx, DC_PRED, 16);
1408             } else {
1409                 memset(td->left_mode_ctx, NEARESTMV, 8);
1410             }
1411             memset(td->left_y_nnz_ctx, 0, 16);
1412             memset(td->left_uv_nnz_ctx, 0, 32);
1413             memset(td->left_segpred_ctx, 0, 8);
1414
1415             for (col = tile_col_start;
1416                  col < tile_col_end;
1417                  col += 8, yoff2 += 64 * bytesperpixel,
1418                  uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1419                 // FIXME integrate with lf code (i.e. zero after each
1420                 // use, similar to invtxfm coefficients, or similar)
1421                 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1422                 decode_sb(td, row, col, lflvl_ptr,
1423                             yoff2, uvoff2, BL_64X64);
1424             }
1425
1426             // backup pre-loopfilter reconstruction data for intra
1427             // prediction of next row of sb64s
1428             tile_cols_len = tile_col_end - tile_col_start;
1429             if (row + 8 < s->rows) {
1430                 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1431                        f->data[0] + yoff + 63 * ls_y,
1432                        8 * tile_cols_len * bytesperpixel);
1433                 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1434                        f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1435                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1436                 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1437                        f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1438                        8 * tile_cols_len * bytesperpixel >> s->ss_h);
1439             }
1440
1441             vp9_report_tile_progress(s, row >> 3, 1);
1442         }
1443     }
1444     return 0;
1445 }
1446
1447 static av_always_inline
1448 int loopfilter_proc(AVCodecContext *avctx)
1449 {
1450     VP9Context *s = avctx->priv_data;
1451     ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1452     VP9Filter *lflvl_ptr;
1453     int bytesperpixel = s->bytesperpixel, col, i;
1454     AVFrame *f;
1455
1456     f = s->s.frames[CUR_FRAME].tf.f;
1457     ls_y = f->linesize[0];
1458     ls_uv =f->linesize[1];
1459
1460     for (i = 0; i < s->sb_rows; i++) {
1461         vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1462
1463         if (s->s.h.filter.level) {
1464             yoff = (ls_y * 64)*i;
1465             uvoff =  (ls_uv * 64 >> s->ss_v)*i;
1466             lflvl_ptr = s->lflvl+s->sb_cols*i;
1467             for (col = 0; col < s->cols;
1468                  col += 8, yoff += 64 * bytesperpixel,
1469                  uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1470                 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1471                                      yoff, uvoff);
1472             }
1473         }
1474     }
1475     return 0;
1476 }
1477 #endif
1478
1479 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1480                             int *got_frame, AVPacket *pkt)
1481 {
1482     const uint8_t *data = pkt->data;
1483     int size = pkt->size;
1484     VP9Context *s = avctx->priv_data;
1485     int ret, i, j, ref;
1486     int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1487                             (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1488     AVFrame *f;
1489
1490     if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1491         return ret;
1492     } else if (ret == 0) {
1493         if (!s->s.refs[ref].f->buf[0]) {
1494             av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1495             return AVERROR_INVALIDDATA;
1496         }
1497         if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1498             return ret;
1499         ((AVFrame *)frame)->pts = pkt->pts;
1500 #if FF_API_PKT_PTS
1501 FF_DISABLE_DEPRECATION_WARNINGS
1502         ((AVFrame *)frame)->pkt_pts = pkt->pts;
1503 FF_ENABLE_DEPRECATION_WARNINGS
1504 #endif
1505         ((AVFrame *)frame)->pkt_dts = pkt->dts;
1506         for (i = 0; i < 8; i++) {
1507             if (s->next_refs[i].f->buf[0])
1508                 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1509             if (s->s.refs[i].f->buf[0] &&
1510                 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1511                 return ret;
1512         }
1513         *got_frame = 1;
1514         return pkt->size;
1515     }
1516     data += ret;
1517     size -= ret;
1518
1519     if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1520         if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1521             vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1522         if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1523             (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1524             return ret;
1525     }
1526     if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1527         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1528     if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1529         (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1530         return ret;
1531     if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1532         vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1533     if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1534         return ret;
1535     f = s->s.frames[CUR_FRAME].tf.f;
1536     f->key_frame = s->s.h.keyframe;
1537     f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1538
1539     if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1540         (s->s.frames[REF_FRAME_MVPAIR].tf.f->width  != s->s.frames[CUR_FRAME].tf.f->width ||
1541          s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1542         vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1543     }
1544
1545     // ref frame setup
1546     for (i = 0; i < 8; i++) {
1547         if (s->next_refs[i].f->buf[0])
1548             ff_thread_release_buffer(avctx, &s->next_refs[i]);
1549         if (s->s.h.refreshrefmask & (1 << i)) {
1550             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1551         } else if (s->s.refs[i].f->buf[0]) {
1552             ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1553         }
1554         if (ret < 0)
1555             return ret;
1556     }
1557
1558     if (avctx->hwaccel) {
1559         ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1560         if (ret < 0)
1561             return ret;
1562         ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1563         if (ret < 0)
1564             return ret;
1565         ret = avctx->hwaccel->end_frame(avctx);
1566         if (ret < 0)
1567             return ret;
1568         goto finish;
1569     }
1570
1571     // main tile decode loop
1572     memset(s->above_partition_ctx, 0, s->cols);
1573     memset(s->above_skip_ctx, 0, s->cols);
1574     if (s->s.h.keyframe || s->s.h.intraonly) {
1575         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1576     } else {
1577         memset(s->above_mode_ctx, NEARESTMV, s->cols);
1578     }
1579     memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1580     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1581     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1582     memset(s->above_segpred_ctx, 0, s->cols);
1583     s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1584         avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1585     if ((ret = update_block_buffers(avctx)) < 0) {
1586         av_log(avctx, AV_LOG_ERROR,
1587                "Failed to allocate block buffers\n");
1588         return ret;
1589     }
1590     if (s->s.h.refreshctx && s->s.h.parallelmode) {
1591         int j, k, l, m;
1592
1593         for (i = 0; i < 4; i++) {
1594             for (j = 0; j < 2; j++)
1595                 for (k = 0; k < 2; k++)
1596                     for (l = 0; l < 6; l++)
1597                         for (m = 0; m < 6; m++)
1598                             memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1599                                    s->prob.coef[i][j][k][l][m], 3);
1600             if (s->s.h.txfmmode == i)
1601                 break;
1602         }
1603         s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1604         ff_thread_finish_setup(avctx);
1605     } else if (!s->s.h.refreshctx) {
1606         ff_thread_finish_setup(avctx);
1607     }
1608
1609 #if HAVE_THREADS
1610     if (avctx->active_thread_type & FF_THREAD_SLICE) {
1611         for (i = 0; i < s->sb_rows; i++)
1612             atomic_store(&s->entries[i], 0);
1613     }
1614 #endif
1615
1616     do {
1617         for (i = 0; i < s->active_tile_cols; i++) {
1618             s->td[i].b = s->td[i].b_base;
1619             s->td[i].block = s->td[i].block_base;
1620             s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1621             s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1622             s->td[i].eob = s->td[i].eob_base;
1623             s->td[i].uveob[0] = s->td[i].uveob_base[0];
1624             s->td[i].uveob[1] = s->td[i].uveob_base[1];
1625         }
1626
1627 #if HAVE_THREADS
1628         if (avctx->active_thread_type == FF_THREAD_SLICE) {
1629             int tile_row, tile_col;
1630
1631             av_assert1(!s->pass);
1632
1633             for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1634                 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1635                     int64_t tile_size;
1636
1637                     if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1638                         tile_row == s->s.h.tiling.tile_rows - 1) {
1639                         tile_size = size;
1640                     } else {
1641                         tile_size = AV_RB32(data);
1642                         data += 4;
1643                         size -= 4;
1644                     }
1645                     if (tile_size > size)
1646                         return AVERROR_INVALIDDATA;
1647                     ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1648                     if (ret < 0)
1649                         return ret;
1650                     if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1651                         return AVERROR_INVALIDDATA;
1652                     data += tile_size;
1653                     size -= tile_size;
1654                 }
1655             }
1656
1657             ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1658         } else
1659 #endif
1660         {
1661             ret = decode_tiles(avctx, data, size);
1662             if (ret < 0) {
1663                 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1664                 return ret;
1665             }
1666         }
1667
1668         // Sum all counts fields into td[0].counts for tile threading
1669         if (avctx->active_thread_type == FF_THREAD_SLICE)
1670             for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1671                 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1672                     ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1673
1674         if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1675             ff_vp9_adapt_probs(s);
1676             ff_thread_finish_setup(avctx);
1677         }
1678     } while (s->pass++ == 1);
1679     ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1680
1681 finish:
1682     // ref frame setup
1683     for (i = 0; i < 8; i++) {
1684         if (s->s.refs[i].f->buf[0])
1685             ff_thread_release_buffer(avctx, &s->s.refs[i]);
1686         if (s->next_refs[i].f->buf[0] &&
1687             (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1688             return ret;
1689     }
1690
1691     if (!s->s.h.invisible) {
1692         if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1693             return ret;
1694         *got_frame = 1;
1695     }
1696
1697     return pkt->size;
1698 }
1699
1700 static void vp9_decode_flush(AVCodecContext *avctx)
1701 {
1702     VP9Context *s = avctx->priv_data;
1703     int i;
1704
1705     for (i = 0; i < 3; i++)
1706         vp9_frame_unref(avctx, &s->s.frames[i]);
1707     for (i = 0; i < 8; i++)
1708         ff_thread_release_buffer(avctx, &s->s.refs[i]);
1709 }
1710
1711 static int init_frames(AVCodecContext *avctx)
1712 {
1713     VP9Context *s = avctx->priv_data;
1714     int i;
1715
1716     for (i = 0; i < 3; i++) {
1717         s->s.frames[i].tf.f = av_frame_alloc();
1718         if (!s->s.frames[i].tf.f) {
1719             vp9_decode_free(avctx);
1720             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1721             return AVERROR(ENOMEM);
1722         }
1723     }
1724     for (i = 0; i < 8; i++) {
1725         s->s.refs[i].f = av_frame_alloc();
1726         s->next_refs[i].f = av_frame_alloc();
1727         if (!s->s.refs[i].f || !s->next_refs[i].f) {
1728             vp9_decode_free(avctx);
1729             av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1730             return AVERROR(ENOMEM);
1731         }
1732     }
1733
1734     return 0;
1735 }
1736
1737 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1738 {
1739     VP9Context *s = avctx->priv_data;
1740
1741     s->last_bpp = 0;
1742     s->s.h.filter.sharpness = -1;
1743
1744     return init_frames(avctx);
1745 }
1746
1747 #if HAVE_THREADS
1748 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1749 {
1750     int i, ret;
1751     VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1752
1753     for (i = 0; i < 3; i++) {
1754         if (s->s.frames[i].tf.f->buf[0])
1755             vp9_frame_unref(dst, &s->s.frames[i]);
1756         if (ssrc->s.frames[i].tf.f->buf[0]) {
1757             if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1758                 return ret;
1759         }
1760     }
1761     for (i = 0; i < 8; i++) {
1762         if (s->s.refs[i].f->buf[0])
1763             ff_thread_release_buffer(dst, &s->s.refs[i]);
1764         if (ssrc->next_refs[i].f->buf[0]) {
1765             if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1766                 return ret;
1767         }
1768     }
1769
1770     s->s.h.invisible = ssrc->s.h.invisible;
1771     s->s.h.keyframe = ssrc->s.h.keyframe;
1772     s->s.h.intraonly = ssrc->s.h.intraonly;
1773     s->ss_v = ssrc->ss_v;
1774     s->ss_h = ssrc->ss_h;
1775     s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1776     s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1777     s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1778     s->bytesperpixel = ssrc->bytesperpixel;
1779     s->gf_fmt = ssrc->gf_fmt;
1780     s->w = ssrc->w;
1781     s->h = ssrc->h;
1782     s->s.h.bpp = ssrc->s.h.bpp;
1783     s->bpp_index = ssrc->bpp_index;
1784     s->pix_fmt = ssrc->pix_fmt;
1785     memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1786     memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1787     memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1788            sizeof(s->s.h.segmentation.feat));
1789
1790     return 0;
1791 }
1792 #endif
1793
1794 AVCodec ff_vp9_decoder = {
1795     .name                  = "vp9",
1796     .long_name             = NULL_IF_CONFIG_SMALL("Google VP9"),
1797     .type                  = AVMEDIA_TYPE_VIDEO,
1798     .id                    = AV_CODEC_ID_VP9,
1799     .priv_data_size        = sizeof(VP9Context),
1800     .init                  = vp9_decode_init,
1801     .close                 = vp9_decode_free,
1802     .decode                = vp9_decode_frame,
1803     .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1804     .caps_internal         = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1805                              FF_CODEC_CAP_ALLOCATE_PROGRESS,
1806     .flush                 = vp9_decode_flush,
1807     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1808     .profiles              = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1809     .bsfs                  = "vp9_superframe_split",
1810     .hw_configs            = (const AVCodecHWConfigInternal*[]) {
1811 #if CONFIG_VP9_DXVA2_HWACCEL
1812                                HWACCEL_DXVA2(vp9),
1813 #endif
1814 #if CONFIG_VP9_D3D11VA_HWACCEL
1815                                HWACCEL_D3D11VA(vp9),
1816 #endif
1817 #if CONFIG_VP9_D3D11VA2_HWACCEL
1818                                HWACCEL_D3D11VA2(vp9),
1819 #endif
1820 #if CONFIG_VP9_NVDEC_HWACCEL
1821                                HWACCEL_NVDEC(vp9),
1822 #endif
1823 #if CONFIG_VP9_VAAPI_HWACCEL
1824                                HWACCEL_VAAPI(vp9),
1825 #endif
1826 #if CONFIG_VP9_VDPAU_HWACCEL
1827                                HWACCEL_VDPAU(vp9),
1828 #endif
1829                                NULL
1830                            },
1831 };