git.sesse.net Git - ffmpeg/blob - libavcodec/vp9.c

   1 /*
   2  * VP9 compatible video decoder
   3  *
   4  * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
   5  * Copyright (C) 2013 Clément Bœsch <u pkh me>
   6  *
   7  * This file is part of Libav.
   8  *
   9  * Libav is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * Libav is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with Libav; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include "libavutil/avassert.h"
  25
  26 #include "avcodec.h"
  27 #include "get_bits.h"
  28 #include "internal.h"
  29 #include "videodsp.h"
  30 #include "vp56.h"
  31 #include "vp9.h"
  32 #include "vp9data.h"
  33
  34 #define VP9_SYNCCODE 0x498342
  35 #define MAX_PROB 255
  36
  37 static void vp9_decode_flush(AVCodecContext *avctx)
  38 {
  39     VP9Context *s = avctx->priv_data;
  40     int i;
  41
  42     for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
  43         av_frame_unref(s->refs[i]);
  44 }
  45
  46 static int update_size(AVCodecContext *avctx, int w, int h)
  47 {
  48     VP9Context *s = avctx->priv_data;
  49     uint8_t *p;
  50
  51     if (s->above_partition_ctx && w == avctx->width && h == avctx->height)
  52         return 0;
  53
  54     vp9_decode_flush(avctx);
  55
  56     if (w <= 0 || h <= 0)
  57         return AVERROR_INVALIDDATA;
  58
  59     avctx->width  = w;
  60     avctx->height = h;
  61     s->sb_cols    = (w + 63) >> 6;
  62     s->sb_rows    = (h + 63) >> 6;
  63     s->cols       = (w +  7) >> 3;
  64     s->rows       = (h +  7) >> 3;
  65
  66 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
  67     av_free(s->above_partition_ctx);
  68     p = av_malloc(s->sb_cols *
  69                   (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
  70                    64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
  71     if (!p)
  72         return AVERROR(ENOMEM);
  73     assign(s->above_partition_ctx, uint8_t *,     8);
  74     assign(s->above_skip_ctx,      uint8_t *,     8);
  75     assign(s->above_txfm_ctx,      uint8_t *,     8);
  76     assign(s->above_mode_ctx,      uint8_t *,    16);
  77     assign(s->above_y_nnz_ctx,     uint8_t *,    16);
  78     assign(s->above_uv_nnz_ctx[0], uint8_t *,     8);
  79     assign(s->above_uv_nnz_ctx[1], uint8_t *,     8);
  80     assign(s->intra_pred_data[0],  uint8_t *,    64);
  81     assign(s->intra_pred_data[1],  uint8_t *,    32);
  82     assign(s->intra_pred_data[2],  uint8_t *,    32);
  83     assign(s->above_segpred_ctx,   uint8_t *,     8);
  84     assign(s->above_intra_ctx,     uint8_t *,     8);
  85     assign(s->above_comp_ctx,      uint8_t *,     8);
  86     assign(s->above_ref_ctx,       uint8_t *,     8);
  87     assign(s->above_filter_ctx,    uint8_t *,     8);
  88     assign(s->lflvl,               VP9Filter *,   1);
  89     assign(s->above_mv_ctx,        VP56mv(*)[2], 16);
  90     assign(s->segmentation_map,    uint8_t *,      64 * s->sb_rows);
  91     assign(s->mv[0],               VP9MVRefPair *, 64 * s->sb_rows);
  92     assign(s->mv[1],               VP9MVRefPair *, 64 * s->sb_rows);
  93 #undef assign
  94
  95     return 0;
  96 }
  97
  98 // The sign bit is at the end, not the start, of a bit sequence
  99 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
 100 {
 101     int v = get_bits(gb, n);
 102     return get_bits1(gb) ? -v : v;
 103 }
 104
 105 static av_always_inline int inv_recenter_nonneg(int v, int m)
 106 {
 107     if (v > 2 * m)
 108         return v;
 109     if (v & 1)
 110         return m - ((v + 1) >> 1);
 111     return m + (v >> 1);
 112 }
 113
 114 // differential forward probability updates
 115 static int update_prob(VP56RangeCoder *c, int p)
 116 {
 117     static const int inv_map_table[MAX_PROB - 1] = {
 118           7,  20,  33,  46,  59,  72,  85,  98, 111, 124, 137, 150, 163, 176,
 119         189, 202, 215, 228, 241, 254,   1,   2,   3,   4,   5,   6,   8,   9,
 120          10,  11,  12,  13,  14,  15,  16,  17,  18,  19,  21,  22,  23,  24,
 121          25,  26,  27,  28,  29,  30,  31,  32,  34,  35,  36,  37,  38,  39,
 122          40,  41,  42,  43,  44,  45,  47,  48,  49,  50,  51,  52,  53,  54,
 123          55,  56,  57,  58,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
 124          70,  71,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,
 125          86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,  99, 100,
 126         101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
 127         116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
 128         131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
 129         146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
 130         161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
 131         177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
 132         192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
 133         207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
 134         222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
 135         237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
 136         252, 253,
 137     };
 138     int d;
 139
 140     /* This code is trying to do a differential probability update. For a
 141      * current probability A in the range [1, 255], the difference to a new
 142      * probability of any value can be expressed differentially as 1-A, 255-A
 143      * where some part of this (absolute range) exists both in positive as
 144      * well as the negative part, whereas another part only exists in one
 145      * half. We're trying to code this shared part differentially, i.e.
 146      * times two where the value of the lowest bit specifies the sign, and
 147      * the single part is then coded on top of this. This absolute difference
 148      * then again has a value of [0, 254], but a bigger value in this range
 149      * indicates that we're further away from the original value A, so we
 150      * can code this as a VLC code, since higher values are increasingly
 151      * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
 152      * updates vs. the 'fine, exact' updates further down the range, which
 153      * adds one extra dimension to this differential update model. */
 154
 155     if (!vp8_rac_get(c)) {
 156         d = vp8_rac_get_uint(c, 4) + 0;
 157     } else if (!vp8_rac_get(c)) {
 158         d = vp8_rac_get_uint(c, 4) + 16;
 159     } else if (!vp8_rac_get(c)) {
 160         d = vp8_rac_get_uint(c, 5) + 32;
 161     } else {
 162         d = vp8_rac_get_uint(c, 7);
 163         if (d >= 65) {
 164             d = (d << 1) - 65 + vp8_rac_get(c);
 165             d = av_clip(d, 0, MAX_PROB - 65 - 1);
 166         }
 167         d += 64;
 168     }
 169
 170     return p <= 128
 171            ?   1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
 172            : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
 173 }
 174
 175 static int decode_frame_header(AVCodecContext *avctx,
 176                                const uint8_t *data, int size, int *ref)
 177 {
 178     VP9Context *s = avctx->priv_data;
 179     int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
 180     int last_invisible;
 181     const uint8_t *data2;
 182
 183     /* general header */
 184     if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
 185         av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
 186         return ret;
 187     }
 188     if (get_bits(&s->gb, 2) != 0x2) { // frame marker
 189         av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
 190         return AVERROR_INVALIDDATA;
 191     }
 192     s->profile = get_bits1(&s->gb);
 193     if (get_bits1(&s->gb)) { // reserved bit
 194         av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
 195         return AVERROR_INVALIDDATA;
 196     }
 197     if (get_bits1(&s->gb)) {
 198         *ref = get_bits(&s->gb, 3);
 199         return 0;
 200     }
 201
 202     s->last_keyframe = s->keyframe;
 203     s->keyframe      = !get_bits1(&s->gb);
 204
 205     last_invisible = s->invisible;
 206     s->invisible   = !get_bits1(&s->gb);
 207     s->errorres    = get_bits1(&s->gb);
 208     // FIXME disable this upon resolution change
 209     s->use_last_frame_mvs = !s->errorres && !last_invisible;
 210
 211     if (s->keyframe) {
 212         if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 213             av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 214             return AVERROR_INVALIDDATA;
 215         }
 216         s->colorspace = get_bits(&s->gb, 3);
 217         if (s->colorspace == 7) { // RGB = profile 1
 218             av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
 219             return AVERROR_INVALIDDATA;
 220         }
 221         s->fullrange = get_bits1(&s->gb);
 222
 223         // subsampling bits
 224         if (s->profile == 1 || s->profile == 3) {
 225             s->sub_x = get_bits1(&s->gb);
 226             s->sub_y = get_bits1(&s->gb);
 227             if (s->sub_x && s->sub_y) {
 228                 av_log(avctx, AV_LOG_ERROR,
 229                        "4:2:0 color not supported in profile 1 or 3\n");
 230                 return AVERROR_INVALIDDATA;
 231             }
 232             if (get_bits1(&s->gb)) { // reserved bit
 233                 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
 234                 return AVERROR_INVALIDDATA;
 235             }
 236         } else {
 237             s->sub_x = s->sub_y = 1;
 238         }
 239         if (!s->sub_x || !s->sub_y) {
 240             avpriv_report_missing_feature(avctx, "Subsampling %d:%d",
 241                                           s->sub_x, s->sub_y);
 242             return AVERROR_PATCHWELCOME;
 243         }
 244
 245         s->refreshrefmask = 0xff;
 246         w = get_bits(&s->gb, 16) + 1;
 247         h = get_bits(&s->gb, 16) + 1;
 248         if (get_bits1(&s->gb)) // display size
 249             skip_bits(&s->gb, 32);
 250     } else {
 251         s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
 252         s->resetctx  = s->errorres ? 0 : get_bits(&s->gb, 2);
 253         if (s->intraonly) {
 254             if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
 255                 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
 256                 return AVERROR_INVALIDDATA;
 257             }
 258             s->refreshrefmask = get_bits(&s->gb, 8);
 259             w = get_bits(&s->gb, 16) + 1;
 260             h = get_bits(&s->gb, 16) + 1;
 261             if (get_bits1(&s->gb)) // display size
 262                 skip_bits(&s->gb, 32);
 263         } else {
 264             s->refreshrefmask = get_bits(&s->gb, 8);
 265             s->refidx[0]      = get_bits(&s->gb, 3);
 266             s->signbias[0]    = get_bits1(&s->gb);
 267             s->refidx[1]      = get_bits(&s->gb, 3);
 268             s->signbias[1]    = get_bits1(&s->gb);
 269             s->refidx[2]      = get_bits(&s->gb, 3);
 270             s->signbias[2]    = get_bits1(&s->gb);
 271             if (!s->refs[s->refidx[0]]->buf[0] ||
 272                 !s->refs[s->refidx[1]]->buf[0] ||
 273                 !s->refs[s->refidx[2]]->buf[0]) {
 274                 av_log(avctx, AV_LOG_ERROR,
 275                        "Not all references are available\n");
 276                 return AVERROR_INVALIDDATA;
 277             }
 278             if (get_bits1(&s->gb)) {
 279                 w = s->refs[s->refidx[0]]->width;
 280                 h = s->refs[s->refidx[0]]->height;
 281             } else if (get_bits1(&s->gb)) {
 282                 w = s->refs[s->refidx[1]]->width;
 283                 h = s->refs[s->refidx[1]]->height;
 284             } else if (get_bits1(&s->gb)) {
 285                 w = s->refs[s->refidx[2]]->width;
 286                 h = s->refs[s->refidx[2]]->height;
 287             } else {
 288                 w = get_bits(&s->gb, 16) + 1;
 289                 h = get_bits(&s->gb, 16) + 1;
 290             }
 291             if (get_bits1(&s->gb)) // display size
 292                 skip_bits(&s->gb, 32);
 293             s->highprecisionmvs = get_bits1(&s->gb);
 294             s->filtermode       = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
 295                                   get_bits(&s->gb, 2);
 296             s->allowcompinter   = s->signbias[0] != s->signbias[1] ||
 297                                   s->signbias[0] != s->signbias[2];
 298             if (s->allowcompinter) {
 299                 if (s->signbias[0] == s->signbias[1]) {
 300                     s->fixcompref    = 2;
 301                     s->varcompref[0] = 0;
 302                     s->varcompref[1] = 1;
 303                 } else if (s->signbias[0] == s->signbias[2]) {
 304                     s->fixcompref    = 1;
 305                     s->varcompref[0] = 0;
 306                     s->varcompref[1] = 2;
 307                 } else {
 308                     s->fixcompref    = 0;
 309                     s->varcompref[0] = 1;
 310                     s->varcompref[1] = 2;
 311                 }
 312             }
 313         }
 314     }
 315
 316     s->refreshctx   = s->errorres ? 0 : get_bits1(&s->gb);
 317     s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
 318     s->framectxid   = c = get_bits(&s->gb, 2);
 319
 320     /* loopfilter header data */
 321     s->filter.level = get_bits(&s->gb, 6);
 322     sharp           = get_bits(&s->gb, 3);
 323     /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
 324      * keep the old cache values since they are still valid. */
 325     if (s->filter.sharpness != sharp)
 326         memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
 327     s->filter.sharpness = sharp;
 328     if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
 329         if (get_bits1(&s->gb)) {
 330             for (i = 0; i < 4; i++)
 331                 if (get_bits1(&s->gb))
 332                     s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
 333             for (i = 0; i < 2; i++)
 334                 if (get_bits1(&s->gb))
 335                     s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
 336         }
 337     } else {
 338         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
 339     }
 340
 341     /* quantization header data */
 342     s->yac_qi      = get_bits(&s->gb, 8);
 343     s->ydc_qdelta  = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
 344     s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
 345     s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
 346     s->lossless    = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
 347                      s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
 348
 349     /* segmentation header info */
 350     if ((s->segmentation.enabled = get_bits1(&s->gb))) {
 351         if ((s->segmentation.update_map = get_bits1(&s->gb))) {
 352             for (i = 0; i < 7; i++)
 353                 s->prob.seg[i] = get_bits1(&s->gb) ?
 354                                  get_bits(&s->gb, 8) : 255;
 355             if ((s->segmentation.temporal = get_bits1(&s->gb)))
 356                 for (i = 0; i < 3; i++)
 357                     s->prob.segpred[i] = get_bits1(&s->gb) ?
 358                                          get_bits(&s->gb, 8) : 255;
 359         }
 360
 361         if (get_bits1(&s->gb)) {
 362             s->segmentation.absolute_vals = get_bits1(&s->gb);
 363             for (i = 0; i < 8; i++) {
 364                 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
 365                     s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
 366                 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
 367                     s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
 368                 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
 369                     s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
 370                 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
 371             }
 372         }
 373     } else {
 374         s->segmentation.feat[0].q_enabled    = 0;
 375         s->segmentation.feat[0].lf_enabled   = 0;
 376         s->segmentation.feat[0].skip_enabled = 0;
 377         s->segmentation.feat[0].ref_enabled  = 0;
 378     }
 379
 380     // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
 381     for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
 382         int qyac, qydc, quvac, quvdc, lflvl, sh;
 383
 384         if (s->segmentation.feat[i].q_enabled) {
 385             if (s->segmentation.absolute_vals)
 386                 qyac = s->segmentation.feat[i].q_val;
 387             else
 388                 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
 389         } else {
 390             qyac = s->yac_qi;
 391         }
 392         qydc  = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
 393         quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
 394         quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
 395         qyac  = av_clip_uintp2(qyac, 8);
 396
 397         s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
 398         s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
 399         s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
 400         s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
 401
 402         sh = s->filter.level >= 32;
 403         if (s->segmentation.feat[i].lf_enabled) {
 404             if (s->segmentation.absolute_vals)
 405                 lflvl = s->segmentation.feat[i].lf_val;
 406             else
 407                 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
 408         } else {
 409             lflvl = s->filter.level;
 410         }
 411         s->segmentation.feat[i].lflvl[0][0] =
 412         s->segmentation.feat[i].lflvl[0][1] =
 413             av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
 414         for (j = 1; j < 4; j++) {
 415             s->segmentation.feat[i].lflvl[j][0] =
 416                 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
 417                                          s->lf_delta.mode[0]) << sh), 6);
 418             s->segmentation.feat[i].lflvl[j][1] =
 419                 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
 420                                          s->lf_delta.mode[1]) << sh), 6);
 421         }
 422     }
 423
 424     /* tiling info */
 425     if ((ret = update_size(avctx, w, h)) < 0) {
 426         av_log(avctx, AV_LOG_ERROR,
 427                "Failed to initialize decoder for %dx%d\n", w, h);
 428         return ret;
 429     }
 430     for (s->tiling.log2_tile_cols = 0;
 431          (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
 432          s->tiling.log2_tile_cols++) ;
 433     for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
 434     max = FFMAX(0, max - 1);
 435     while (max > s->tiling.log2_tile_cols) {
 436         if (get_bits1(&s->gb))
 437             s->tiling.log2_tile_cols++;
 438         else
 439             break;
 440     }
 441     s->tiling.log2_tile_rows = decode012(&s->gb);
 442     s->tiling.tile_rows      = 1 << s->tiling.log2_tile_rows;
 443     if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
 444         s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
 445         s->c_b              = av_fast_realloc(s->c_b, &s->c_b_size,
 446                                               sizeof(VP56RangeCoder) *
 447                                               s->tiling.tile_cols);
 448         if (!s->c_b) {
 449             av_log(avctx, AV_LOG_ERROR,
 450                    "Ran out of memory during range coder init\n");
 451             return AVERROR(ENOMEM);
 452         }
 453     }
 454
 455     if (s->keyframe || s->errorres || s->intraonly) {
 456         s->prob_ctx[0].p =
 457         s->prob_ctx[1].p =
 458         s->prob_ctx[2].p =
 459         s->prob_ctx[3].p = ff_vp9_default_probs;
 460         memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
 461                sizeof(ff_vp9_default_coef_probs));
 462         memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
 463                sizeof(ff_vp9_default_coef_probs));
 464         memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
 465                sizeof(ff_vp9_default_coef_probs));
 466         memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
 467                sizeof(ff_vp9_default_coef_probs));
 468     }
 469
 470     // next 16 bits is size of the rest of the header (arith-coded)
 471     size2 = get_bits(&s->gb, 16);
 472     data2 = align_get_bits(&s->gb);
 473     if (size2 > size - (data2 - data)) {
 474         av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
 475         return AVERROR_INVALIDDATA;
 476     }
 477     ff_vp56_init_range_decoder(&s->c, data2, size2);
 478     if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
 479         av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
 480         return AVERROR_INVALIDDATA;
 481     }
 482
 483     if (s->keyframe || s->intraonly)
 484         memset(s->counts.coef, 0,
 485                sizeof(s->counts.coef) + sizeof(s->counts.eob));
 486     else
 487         memset(&s->counts, 0, sizeof(s->counts));
 488
 489     /* FIXME is it faster to not copy here, but do it down in the fw updates
 490      * as explicit copies if the fw update is missing (and skip the copy upon
 491      * fw update)? */
 492     s->prob.p = s->prob_ctx[c].p;
 493
 494     // txfm updates
 495     if (s->lossless) {
 496         s->txfmmode = TX_4X4;
 497     } else {
 498         s->txfmmode = vp8_rac_get_uint(&s->c, 2);
 499         if (s->txfmmode == 3)
 500             s->txfmmode += vp8_rac_get(&s->c);
 501
 502         if (s->txfmmode == TX_SWITCHABLE) {
 503             for (i = 0; i < 2; i++)
 504                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 505                     s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
 506             for (i = 0; i < 2; i++)
 507                 for (j = 0; j < 2; j++)
 508                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 509                         s->prob.p.tx16p[i][j] =
 510                             update_prob(&s->c, s->prob.p.tx16p[i][j]);
 511             for (i = 0; i < 2; i++)
 512                 for (j = 0; j < 3; j++)
 513                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 514                         s->prob.p.tx32p[i][j] =
 515                             update_prob(&s->c, s->prob.p.tx32p[i][j]);
 516         }
 517     }
 518
 519     // coef updates
 520     for (i = 0; i < 4; i++) {
 521         uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
 522         if (vp8_rac_get(&s->c)) {
 523             for (j = 0; j < 2; j++)
 524                 for (k = 0; k < 2; k++)
 525                     for (l = 0; l < 6; l++)
 526                         for (m = 0; m < 6; m++) {
 527                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 528                             uint8_t *r = ref[j][k][l][m];
 529                             if (m >= 3 && l == 0) // dc only has 3 pt
 530                                 break;
 531                             for (n = 0; n < 3; n++) {
 532                                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 533                                     p[n] = update_prob(&s->c, r[n]);
 534                                 else
 535                                     p[n] = r[n];
 536                             }
 537                             p[3] = 0;
 538                         }
 539         } else {
 540             for (j = 0; j < 2; j++)
 541                 for (k = 0; k < 2; k++)
 542                     for (l = 0; l < 6; l++)
 543                         for (m = 0; m < 6; m++) {
 544                             uint8_t *p = s->prob.coef[i][j][k][l][m];
 545                             uint8_t *r = ref[j][k][l][m];
 546                             if (m > 3 && l == 0) // dc only has 3 pt
 547                                 break;
 548                             memcpy(p, r, 3);
 549                             p[3] = 0;
 550                         }
 551         }
 552         if (s->txfmmode == i)
 553             break;
 554     }
 555
 556     // mode updates
 557     for (i = 0; i < 3; i++)
 558         if (vp56_rac_get_prob_branchy(&s->c, 252))
 559             s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
 560     if (!s->keyframe && !s->intraonly) {
 561         for (i = 0; i < 7; i++)
 562             for (j = 0; j < 3; j++)
 563                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 564                     s->prob.p.mv_mode[i][j] =
 565                         update_prob(&s->c, s->prob.p.mv_mode[i][j]);
 566
 567         if (s->filtermode == FILTER_SWITCHABLE)
 568             for (i = 0; i < 4; i++)
 569                 for (j = 0; j < 2; j++)
 570                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 571                         s->prob.p.filter[i][j] =
 572                             update_prob(&s->c, s->prob.p.filter[i][j]);
 573
 574         for (i = 0; i < 4; i++)
 575             if (vp56_rac_get_prob_branchy(&s->c, 252))
 576                 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
 577
 578         if (s->allowcompinter) {
 579             s->comppredmode = vp8_rac_get(&s->c);
 580             if (s->comppredmode)
 581                 s->comppredmode += vp8_rac_get(&s->c);
 582             if (s->comppredmode == PRED_SWITCHABLE)
 583                 for (i = 0; i < 5; i++)
 584                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 585                         s->prob.p.comp[i] =
 586                             update_prob(&s->c, s->prob.p.comp[i]);
 587         } else {
 588             s->comppredmode = PRED_SINGLEREF;
 589         }
 590
 591         if (s->comppredmode != PRED_COMPREF) {
 592             for (i = 0; i < 5; i++) {
 593                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 594                     s->prob.p.single_ref[i][0] =
 595                         update_prob(&s->c, s->prob.p.single_ref[i][0]);
 596                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 597                     s->prob.p.single_ref[i][1] =
 598                         update_prob(&s->c, s->prob.p.single_ref[i][1]);
 599             }
 600         }
 601
 602         if (s->comppredmode != PRED_SINGLEREF) {
 603             for (i = 0; i < 5; i++)
 604                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 605                     s->prob.p.comp_ref[i] =
 606                         update_prob(&s->c, s->prob.p.comp_ref[i]);
 607         }
 608
 609         for (i = 0; i < 4; i++)
 610             for (j = 0; j < 9; j++)
 611                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 612                     s->prob.p.y_mode[i][j] =
 613                         update_prob(&s->c, s->prob.p.y_mode[i][j]);
 614
 615         for (i = 0; i < 4; i++)
 616             for (j = 0; j < 4; j++)
 617                 for (k = 0; k < 3; k++)
 618                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 619                         s->prob.p.partition[3 - i][j][k] =
 620                             update_prob(&s->c,
 621                                         s->prob.p.partition[3 - i][j][k]);
 622
 623         // mv fields don't use the update_prob subexp model for some reason
 624         for (i = 0; i < 3; i++)
 625             if (vp56_rac_get_prob_branchy(&s->c, 252))
 626                 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 627
 628         for (i = 0; i < 2; i++) {
 629             if (vp56_rac_get_prob_branchy(&s->c, 252))
 630                 s->prob.p.mv_comp[i].sign =
 631                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 632
 633             for (j = 0; j < 10; j++)
 634                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 635                     s->prob.p.mv_comp[i].classes[j] =
 636                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 637
 638             if (vp56_rac_get_prob_branchy(&s->c, 252))
 639                 s->prob.p.mv_comp[i].class0 =
 640                     (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 641
 642             for (j = 0; j < 10; j++)
 643                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 644                     s->prob.p.mv_comp[i].bits[j] =
 645                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 646         }
 647
 648         for (i = 0; i < 2; i++) {
 649             for (j = 0; j < 2; j++)
 650                 for (k = 0; k < 3; k++)
 651                     if (vp56_rac_get_prob_branchy(&s->c, 252))
 652                         s->prob.p.mv_comp[i].class0_fp[j][k] =
 653                             (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 654
 655             for (j = 0; j < 3; j++)
 656                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 657                     s->prob.p.mv_comp[i].fp[j] =
 658                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 659         }
 660
 661         if (s->highprecisionmvs) {
 662             for (i = 0; i < 2; i++) {
 663                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 664                     s->prob.p.mv_comp[i].class0_hp =
 665                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 666
 667                 if (vp56_rac_get_prob_branchy(&s->c, 252))
 668                     s->prob.p.mv_comp[i].hp =
 669                         (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
 670             }
 671         }
 672     }
 673
 674     return (data2 - data) + size2;
 675 }
 676
 677 static int decode_subblock(AVCodecContext *avctx, int row, int col,
 678                            VP9Filter *lflvl,
 679                            ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
 680 {
 681     VP9Context *s = avctx->priv_data;
 682     int c = ((s->above_partition_ctx[col]       >> (3 - bl)) & 1) |
 683             (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
 684     int ret;
 685     const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
 686                                    : s->prob.p.partition[bl][c];
 687     enum BlockPartition bp;
 688     ptrdiff_t hbs = 4 >> bl;
 689
 690     if (bl == BL_8X8) {
 691         bp  = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
 692         ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
 693     } else if (col + hbs < s->cols) {
 694         if (row + hbs < s->rows) {
 695             bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
 696             switch (bp) {
 697             case PARTITION_NONE:
 698                 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
 699                                           bl, bp);
 700                 break;
 701             case PARTITION_H:
 702                 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
 703                                           bl, bp);
 704                 if (!ret) {
 705                     yoff  += hbs * 8 * s->cur_frame->linesize[0];
 706                     uvoff += hbs * 4 * s->cur_frame->linesize[1];
 707                     ret    = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
 708                                                  yoff, uvoff, bl, bp);
 709                 }
 710                 break;
 711             case PARTITION_V:
 712                 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
 713                                           bl, bp);
 714                 if (!ret) {
 715                     yoff  += hbs * 8;
 716                     uvoff += hbs * 4;
 717                     ret    = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
 718                                                  yoff, uvoff, bl, bp);
 719                 }
 720                 break;
 721             case PARTITION_SPLIT:
 722                 ret = decode_subblock(avctx, row, col, lflvl,
 723                                       yoff, uvoff, bl + 1);
 724                 if (!ret) {
 725                     ret = decode_subblock(avctx, row, col + hbs, lflvl,
 726                                           yoff + 8 * hbs, uvoff + 4 * hbs,
 727                                           bl + 1);
 728                     if (!ret) {
 729                         yoff  += hbs * 8 * s->cur_frame->linesize[0];
 730                         uvoff += hbs * 4 * s->cur_frame->linesize[1];
 731                         ret    = decode_subblock(avctx, row + hbs, col, lflvl,
 732                                                  yoff, uvoff, bl + 1);
 733                         if (!ret) {
 734                             ret = decode_subblock(avctx, row + hbs, col + hbs,
 735                                                   lflvl, yoff + 8 * hbs,
 736                                                   uvoff + 4 * hbs, bl + 1);
 737                         }
 738                     }
 739                 }
 740                 break;
 741             default:
 742                 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
 743                 return AVERROR_INVALIDDATA;
 744             }
 745         } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
 746             bp  = PARTITION_SPLIT;
 747             ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
 748             if (!ret)
 749                 ret = decode_subblock(avctx, row, col + hbs, lflvl,
 750                                       yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
 751         } else {
 752             bp  = PARTITION_H;
 753             ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
 754                                       bl, bp);
 755         }
 756     } else if (row + hbs < s->rows) {
 757         if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
 758             bp  = PARTITION_SPLIT;
 759             ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
 760             if (!ret) {
 761                 yoff  += hbs * 8 * s->cur_frame->linesize[0];
 762                 uvoff += hbs * 4 * s->cur_frame->linesize[1];
 763                 ret    = decode_subblock(avctx, row + hbs, col, lflvl,
 764                                          yoff, uvoff, bl + 1);
 765             }
 766         } else {
 767             bp  = PARTITION_V;
 768             ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
 769                                       bl, bp);
 770         }
 771     } else {
 772         bp  = PARTITION_SPLIT;
 773         ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
 774     }
 775     s->counts.partition[bl][c][bp]++;
 776
 777     return ret;
 778 }
 779
 780 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
 781                                 int row, int col,
 782                                 ptrdiff_t yoff, ptrdiff_t uvoff)
 783 {
 784     VP9Context *s = avctx->priv_data;
 785     uint8_t *dst   = s->cur_frame->data[0] + yoff, *lvl = lflvl->level;
 786     ptrdiff_t ls_y = s->cur_frame->linesize[0], ls_uv = s->cur_frame->linesize[1];
 787     int y, x, p;
 788
 789     /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
 790      * if you think of them as acting on a 8x8 block max, we can interleave
 791      * each v/h within the single x loop, but that only works if we work on
 792      * 8 pixel blocks, and we won't always do that (we want at least 16px
 793      * to use SSE2 optimizations, perhaps 32 for AVX2). */
 794
 795     // filter edges between columns, Y plane (e.g. block1 | block2)
 796     for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
 797         uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
 798         uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
 799         unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
 800         unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
 801         unsigned hm  = hm1 | hm2 | hm13 | hm23;
 802
 803         for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
 804             if (hm1 & x) {
 805                 int L = *l, H = L >> 4;
 806                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 807
 808                 if (col || x > 1) {
 809                     if (hmask1[0] & x) {
 810                         if (hmask2[0] & x) {
 811                             av_assert2(l[8] == L);
 812                             s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
 813                         } else {
 814                             s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
 815                         }
 816                     } else if (hm2 & x) {
 817                         L  = l[8];
 818                         H |= (L >> 4) << 8;
 819                         E |= s->filter.mblim_lut[L] << 8;
 820                         I |= s->filter.lim_lut[L] << 8;
 821                         s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
 822                                                [!!(hmask2[1] & x)]
 823                                                [0](ptr, ls_y, E, I, H);
 824                     } else {
 825                         s->dsp.loop_filter_8[!!(hmask1[1] & x)]
 826                                             [0](ptr, ls_y, E, I, H);
 827                     }
 828                 }
 829             } else if (hm2 & x) {
 830                 int L = l[8], H = L >> 4;
 831                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 832
 833                 if (col || x > 1) {
 834                     s->dsp.loop_filter_8[!!(hmask2[1] & x)]
 835                                         [0](ptr + 8 * ls_y, ls_y, E, I, H);
 836                 }
 837             }
 838             if (hm13 & x) {
 839                 int L = *l, H = L >> 4;
 840                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 841
 842                 if (hm23 & x) {
 843                     L  = l[8];
 844                     H |= (L >> 4) << 8;
 845                     E |= s->filter.mblim_lut[L] << 8;
 846                     I |= s->filter.lim_lut[L] << 8;
 847                     s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
 848                 } else {
 849                     s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
 850                 }
 851             } else if (hm23 & x) {
 852                 int L = l[8], H = L >> 4;
 853                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 854
 855                 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
 856             }
 857         }
 858     }
 859
 860     //                                          block1
 861     // filter edges between rows, Y plane (e.g. ------)
 862     //                                          block2
 863     dst = s->cur_frame->data[0] + yoff;
 864     lvl = lflvl->level;
 865     for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
 866         uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
 867         unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
 868
 869         for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
 870             if (row || y) {
 871                 if (vm & x) {
 872                     int L = *l, H = L >> 4;
 873                     int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 874
 875                     if (vmask[0] & x) {
 876                         if (vmask[0] & (x << 1)) {
 877                             av_assert2(l[1] == L);
 878                             s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
 879                         } else {
 880                             s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
 881                         }
 882                     } else if (vm & (x << 1)) {
 883                         L  = l[1];
 884                         H |= (L >> 4) << 8;
 885                         E |= s->filter.mblim_lut[L] << 8;
 886                         I |= s->filter.lim_lut[L] << 8;
 887                         s->dsp.loop_filter_mix2[!!(vmask[1] &  x)]
 888                                                [!!(vmask[1] & (x << 1))]
 889                                                [1](ptr, ls_y, E, I, H);
 890                     } else {
 891                         s->dsp.loop_filter_8[!!(vmask[1] & x)]
 892                                             [1](ptr, ls_y, E, I, H);
 893                     }
 894                 } else if (vm & (x << 1)) {
 895                     int L = l[1], H = L >> 4;
 896                     int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 897
 898                     s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
 899                                         [1](ptr + 8, ls_y, E, I, H);
 900                 }
 901             }
 902             if (vm3 & x) {
 903                 int L = *l, H = L >> 4;
 904                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 905
 906                 if (vm3 & (x << 1)) {
 907                     L  = l[1];
 908                     H |= (L >> 4) << 8;
 909                     E |= s->filter.mblim_lut[L] << 8;
 910                     I |= s->filter.lim_lut[L] << 8;
 911                     s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
 912                 } else {
 913                     s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
 914                 }
 915             } else if (vm3 & (x << 1)) {
 916                 int L = l[1], H = L >> 4;
 917                 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
 918
 919                 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
 920             }
 921         }
 922     }
 923
 924     // same principle but for U/V planes
 925     for (p = 0; p < 2; p++) {
 926         lvl = lflvl->level;
 927         dst = s->cur_frame->data[1 + p] + uvoff;
 928         for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
 929             uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
 930             uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
 931             unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
 932             unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
 933
 934             for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
 935                 if (col || x > 1) {
 936                     if (hm1 & x) {
 937                         int L = *l, H = L >> 4;
 938                         int E = s->filter.mblim_lut[L];
 939                         int I = s->filter.lim_lut[L];
 940
 941                         if (hmask1[0] & x) {
 942                             if (hmask2[0] & x) {
 943                                 av_assert2(l[16] == L);
 944                                 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
 945                             } else {
 946                                 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
 947                             }
 948                         } else if (hm2 & x) {
 949                             L  = l[16];
 950                             H |= (L >> 4) << 8;
 951                             E |= s->filter.mblim_lut[L] << 8;
 952                             I |= s->filter.lim_lut[L] << 8;
 953                             s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
 954                                                    [!!(hmask2[1] & x)]
 955                                                    [0](ptr, ls_uv, E, I, H);
 956                         } else {
 957                             s->dsp.loop_filter_8[!!(hmask1[1] & x)]
 958                                                 [0](ptr, ls_uv, E, I, H);
 959                         }
 960                     } else if (hm2 & x) {
 961                         int L = l[16], H = L >> 4;
 962                         int E = s->filter.mblim_lut[L];
 963                         int I = s->filter.lim_lut[L];
 964
 965                         s->dsp.loop_filter_8[!!(hmask2[1] & x)]
 966                                             [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
 967                     }
 968                 }
 969                 if (x & 0xAA)
 970                     l += 2;
 971             }
 972         }
 973         lvl = lflvl->level;
 974         dst = s->cur_frame->data[1 + p] + uvoff;
 975         for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
 976             uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
 977             unsigned vm = vmask[0] | vmask[1] | vmask[2];
 978
 979             for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
 980                 if (row || y) {
 981                     if (vm & x) {
 982                         int L = *l, H = L >> 4;
 983                         int E = s->filter.mblim_lut[L];
 984                         int I = s->filter.lim_lut[L];
 985
 986                         if (vmask[0] & x) {
 987                             if (vmask[0] & (x << 2)) {
 988                                 av_assert2(l[2] == L);
 989                                 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
 990                             } else {
 991                                 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
 992                             }
 993                         } else if (vm & (x << 2)) {
 994                             L  = l[2];
 995                             H |= (L >> 4) << 8;
 996                             E |= s->filter.mblim_lut[L] << 8;
 997                             I |= s->filter.lim_lut[L] << 8;
 998                             s->dsp.loop_filter_mix2[!!(vmask[1] &  x)]
 999                                                    [!!(vmask[1] & (x << 2))]
1000                                                    [1](ptr, ls_uv, E, I, H);
1001                         } else {
1002                             s->dsp.loop_filter_8[!!(vmask[1] & x)]
1003                                                 [1](ptr, ls_uv, E, I, H);
1004                         }
1005                     } else if (vm & (x << 2)) {
1006                         int L = l[2], H = L >> 4;
1007                         int E = s->filter.mblim_lut[L];
1008                         int I = s->filter.lim_lut[L];
1009
1010                         s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
1011                                             [1](ptr + 8, ls_uv, E, I, H);
1012                     }
1013                 }
1014             }
1015             if (y & 1)
1016                 lvl += 16;
1017         }
1018     }
1019 }
1020
1021 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1022 {
1023     int sb_start =  (idx      * n) >> log2_n;
1024     int sb_end   = ((idx + 1) * n) >> log2_n;
1025     *start = FFMIN(sb_start, n) << 3;
1026     *end   = FFMIN(sb_end,   n) << 3;
1027 }
1028
1029 static int vp9_decode_frame(AVCodecContext *avctx, AVFrame *frame,
1030                             int *got_frame, const uint8_t *data, int size)
1031 {
1032     VP9Context *s = avctx->priv_data;
1033     int ret, tile_row, tile_col, i, ref = -1, row, col;
1034     ptrdiff_t yoff = 0, uvoff = 0;
1035
1036     ret = decode_frame_header(avctx, data, size, &ref);
1037     if (ret < 0) {
1038         return ret;
1039     } else if (!ret) {
1040         if (!s->refs[ref]->buf[0]) {
1041             av_log(avctx, AV_LOG_ERROR,
1042                    "Requested reference %d not available\n", ref);
1043             return AVERROR_INVALIDDATA;
1044         }
1045
1046         ret = av_frame_ref(frame, s->refs[ref]);
1047         if (ret < 0)
1048             return ret;
1049         *got_frame = 1;
1050         return 0;
1051     }
1052     data += ret;
1053     size -= ret;
1054
1055     s->cur_frame = frame;
1056
1057     av_frame_unref(s->cur_frame);
1058     if ((ret = ff_get_buffer(avctx, s->cur_frame,
1059                              s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
1060         return ret;
1061     s->cur_frame->key_frame = s->keyframe;
1062     s->cur_frame->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
1063                                           : AV_PICTURE_TYPE_P;
1064
1065     if (s->fullrange)
1066         avctx->color_range = AVCOL_RANGE_JPEG;
1067     else
1068         avctx->color_range = AVCOL_RANGE_MPEG;
1069
1070     switch (s->colorspace) {
1071     case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1072     case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1073     case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1074     case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1075     }
1076
1077     // main tile decode loop
1078     memset(s->above_partition_ctx, 0, s->cols);
1079     memset(s->above_skip_ctx, 0, s->cols);
1080     if (s->keyframe || s->intraonly)
1081         memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1082     else
1083         memset(s->above_mode_ctx, NEARESTMV, s->cols);
1084     memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1085     memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1086     memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1087     memset(s->above_segpred_ctx, 0, s->cols);
1088     for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1089         set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1090                         tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1091         for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1092             int64_t tile_size;
1093
1094             if (tile_col == s->tiling.tile_cols - 1 &&
1095                 tile_row == s->tiling.tile_rows - 1) {
1096                 tile_size = size;
1097             } else {
1098                 tile_size = AV_RB32(data);
1099                 data     += 4;
1100                 size     -= 4;
1101             }
1102             if (tile_size > size)
1103                 return AVERROR_INVALIDDATA;
1104             ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1105             if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
1106                 return AVERROR_INVALIDDATA;
1107             data += tile_size;
1108             size -= tile_size;
1109         }
1110
1111         for (row = s->tiling.tile_row_start;
1112              row < s->tiling.tile_row_end;
1113              row += 8, yoff += s->cur_frame->linesize[0] * 64,
1114              uvoff += s->cur_frame->linesize[1] * 32) {
1115             VP9Filter *lflvl = s->lflvl;
1116             ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1117
1118             for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1119                 set_tile_offset(&s->tiling.tile_col_start,
1120                                 &s->tiling.tile_col_end,
1121                                 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1122
1123                 memset(s->left_partition_ctx, 0, 8);
1124                 memset(s->left_skip_ctx, 0, 8);
1125                 if (s->keyframe || s->intraonly)
1126                     memset(s->left_mode_ctx, DC_PRED, 16);
1127                 else
1128                     memset(s->left_mode_ctx, NEARESTMV, 8);
1129                 memset(s->left_y_nnz_ctx, 0, 16);
1130                 memset(s->left_uv_nnz_ctx, 0, 16);
1131                 memset(s->left_segpred_ctx, 0, 8);
1132
1133                 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1134                 for (col = s->tiling.tile_col_start;
1135                      col < s->tiling.tile_col_end;
1136                      col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1137                     // FIXME integrate with lf code (i.e. zero after each
1138                     // use, similar to invtxfm coefficients, or similar)
1139                     memset(lflvl->mask, 0, sizeof(lflvl->mask));
1140
1141                     if ((ret = decode_subblock(avctx, row, col, lflvl,
1142                                                yoff2, uvoff2, BL_64X64)) < 0)
1143                         return ret;
1144                 }
1145                 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1146             }
1147
1148             // backup pre-loopfilter reconstruction data for intra
1149             // prediction of next row of sb64s
1150             if (row + 8 < s->rows) {
1151                 memcpy(s->intra_pred_data[0],
1152                        s->cur_frame->data[0] + yoff +
1153                        63 * s->cur_frame->linesize[0],
1154                        8 * s->cols);
1155                 memcpy(s->intra_pred_data[1],
1156                        s->cur_frame->data[1] + uvoff +
1157                        31 * s->cur_frame->linesize[1],
1158                        4 * s->cols);
1159                 memcpy(s->intra_pred_data[2],
1160                        s->cur_frame->data[2] + uvoff +
1161                        31 * s->cur_frame->linesize[2],
1162                        4 * s->cols);
1163             }
1164
1165             // loopfilter one row
1166             if (s->filter.level) {
1167                 yoff2  = yoff;
1168                 uvoff2 = uvoff;
1169                 lflvl  = s->lflvl;
1170                 for (col = 0; col < s->cols;
1171                      col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1172                     loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1173             }
1174         }
1175     }
1176
1177     // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
1178     // probability maintenance between frames)
1179     if (s->refreshctx) {
1180         if (s->parallelmode) {
1181             int j, k, l, m;
1182             for (i = 0; i < 4; i++) {
1183                 for (j = 0; j < 2; j++)
1184                     for (k = 0; k < 2; k++)
1185                         for (l = 0; l < 6; l++)
1186                             for (m = 0; m < 6; m++)
1187                                 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1188                                        s->prob.coef[i][j][k][l][m], 3);
1189                 if (s->txfmmode == i)
1190                     break;
1191             }
1192             s->prob_ctx[s->framectxid].p = s->prob.p;
1193         } else {
1194             ff_vp9_adapt_probs(s);
1195         }
1196     }
1197     FFSWAP(VP9MVRefPair *, s->mv[0], s->mv[1]);
1198
1199     // ref frame setup
1200     for (i = 0; i < 8; i++)
1201         if (s->refreshrefmask & (1 << i)) {
1202             av_frame_unref(s->refs[i]);
1203             ret = av_frame_ref(s->refs[i], s->cur_frame);
1204             if (ret < 0)
1205                 return ret;
1206         }
1207
1208     if (s->invisible)
1209         av_frame_unref(s->cur_frame);
1210     else
1211         *got_frame = 1;
1212
1213     return 0;
1214 }
1215
1216 static int vp9_decode_packet(AVCodecContext *avctx, void *frame,
1217                              int *got_frame, AVPacket *avpkt)
1218 {
1219     const uint8_t *data = avpkt->data;
1220     int size            = avpkt->size;
1221     int marker, ret;
1222
1223     /* Read superframe index - this is a collection of individual frames
1224      * that together lead to one visible frame */
1225     marker = data[size - 1];
1226     if ((marker & 0xe0) == 0xc0) {
1227         int nbytes   = 1 + ((marker >> 3) & 0x3);
1228         int n_frames = 1 + (marker & 0x7);
1229         int idx_sz   = 2 + n_frames * nbytes;
1230
1231         if (size >= idx_sz && data[size - idx_sz] == marker) {
1232             const uint8_t *idx = data + size + 1 - idx_sz;
1233
1234             while (n_frames--) {
1235                 unsigned sz = AV_RL32(idx);
1236
1237                 if (nbytes < 4)
1238                     sz &= (1 << (8 * nbytes)) - 1;
1239                 idx += nbytes;
1240
1241                 if (sz > size) {
1242                     av_log(avctx, AV_LOG_ERROR,
1243                            "Superframe packet size too big: %u > %d\n",
1244                            sz, size);
1245                     return AVERROR_INVALIDDATA;
1246                 }
1247
1248                 ret = vp9_decode_frame(avctx, frame, got_frame, data, sz);
1249                 if (ret < 0)
1250                     return ret;
1251                 data += sz;
1252                 size -= sz;
1253             }
1254             return size;
1255         }
1256     }
1257
1258     /* If we get here, there was no valid superframe index, i.e. this is just
1259      * one whole single frame. Decode it as such from the complete input buf. */
1260     if ((ret = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
1261         return ret;
1262     return size;
1263 }
1264
1265 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1266 {
1267     VP9Context *s = avctx->priv_data;
1268     int i;
1269
1270     for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1271         av_frame_free(&s->refs[i]);
1272
1273     av_freep(&s->c_b);
1274     av_freep(&s->above_partition_ctx);
1275
1276     return 0;
1277 }
1278
1279 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1280 {
1281     VP9Context *s = avctx->priv_data;
1282     int i;
1283
1284     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1285
1286     ff_vp9dsp_init(&s->dsp);
1287     ff_videodsp_init(&s->vdsp, 8);
1288
1289     for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1290         s->refs[i] = av_frame_alloc();
1291         if (!s->refs[i]) {
1292             vp9_decode_free(avctx);
1293             return AVERROR(ENOMEM);
1294         }
1295     }
1296
1297     s->filter.sharpness = -1;
1298
1299     return 0;
1300 }
1301
1302 AVCodec ff_vp9_decoder = {
1303     .name           = "vp9",
1304     .long_name      = NULL_IF_CONFIG_SMALL("Google VP9"),
1305     .type           = AVMEDIA_TYPE_VIDEO,
1306     .id             = AV_CODEC_ID_VP9,
1307     .priv_data_size = sizeof(VP9Context),
1308     .init           = vp9_decode_init,
1309     .decode         = vp9_decode_packet,
1310     .flush          = vp9_decode_flush,
1311     .close          = vp9_decode_free,
1312     .capabilities   = AV_CODEC_CAP_DR1,
1313 };