git.sesse.net Git - ffmpeg/blob - libavcodec/vp8.c

   1 /*
   2  * VP7/VP8 compatible video decoder
   3  *
   4  * Copyright (C) 2010 David Conrad
   5  * Copyright (C) 2010 Ronald S. Bultje
   6  * Copyright (C) 2010 Jason Garrett-Glaser
   7  * Copyright (C) 2012 Daniel Kang
   8  * Copyright (C) 2014 Peter Ross
   9  *
  10  * This file is part of FFmpeg.
  11  *
  12  * FFmpeg is free software; you can redistribute it and/or
  13  * modify it under the terms of the GNU Lesser General Public
  14  * License as published by the Free Software Foundation; either
  15  * version 2.1 of the License, or (at your option) any later version.
  16  *
  17  * FFmpeg is distributed in the hope that it will be useful,
  18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  20  * Lesser General Public License for more details.
  21  *
  22  * You should have received a copy of the GNU Lesser General Public
  23  * License along with FFmpeg; if not, write to the Free Software
  24  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  25  */
  26
  27 #include "libavutil/imgutils.h"
  28 #include "avcodec.h"
  29 #include "internal.h"
  30 #include "vp8.h"
  31 #include "vp8data.h"
  32 #include "rectangle.h"
  33 #include "thread.h"
  34
  35 #if ARCH_ARM
  36 #   include "arm/vp8.h"
  37 #endif
  38
  39 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
  40 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
  41 #elif CONFIG_VP7_DECODER
  42 #define VPX(vp7, f) vp7_ ## f
  43 #else // CONFIG_VP8_DECODER
  44 #define VPX(vp7, f) vp8_ ## f
  45 #endif
  46
  47 static void free_buffers(VP8Context *s)
  48 {
  49     int i;
  50     if (s->thread_data)
  51         for (i = 0; i < MAX_THREADS; i++) {
  52 #if HAVE_THREADS
  53             pthread_cond_destroy(&s->thread_data[i].cond);
  54             pthread_mutex_destroy(&s->thread_data[i].lock);
  55 #endif
  56             av_freep(&s->thread_data[i].filter_strength);
  57         }
  58     av_freep(&s->thread_data);
  59     av_freep(&s->macroblocks_base);
  60     av_freep(&s->intra4x4_pred_mode_top);
  61     av_freep(&s->top_nnz);
  62     av_freep(&s->top_border);
  63
  64     s->macroblocks = NULL;
  65 }
  66
  67 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
  68 {
  69     int ret;
  70     if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
  71                                     ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
  72         return ret;
  73     if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
  74         ff_thread_release_buffer(s->avctx, &f->tf);
  75         return AVERROR(ENOMEM);
  76     }
  77     return 0;
  78 }
  79
  80 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
  81 {
  82     av_buffer_unref(&f->seg_map);
  83     ff_thread_release_buffer(s->avctx, &f->tf);
  84 }
  85
  86 #if CONFIG_VP8_DECODER
  87 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
  88 {
  89     int ret;
  90
  91     vp8_release_frame(s, dst);
  92
  93     if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
  94         return ret;
  95     if (src->seg_map &&
  96         !(dst->seg_map = av_buffer_ref(src->seg_map))) {
  97         vp8_release_frame(s, dst);
  98         return AVERROR(ENOMEM);
  99     }
 100
 101     return 0;
 102 }
 103 #endif
 104
 105
 106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
 107 {
 108     VP8Context *s = avctx->priv_data;
 109     int i;
 110
 111     for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
 112         vp8_release_frame(s, &s->frames[i]);
 113     memset(s->framep, 0, sizeof(s->framep));
 114
 115     if (free_mem)
 116         free_buffers(s);
 117 }
 118
 119 static void vp8_decode_flush(AVCodecContext *avctx)
 120 {
 121     vp8_decode_flush_impl(avctx, 0);
 122 }
 123
 124 static VP8Frame * vp8_find_free_buffer(VP8Context *s)
 125 {
 126     VP8Frame *frame = NULL;
 127     int i;
 128
 129     // find a free buffer
 130     for (i = 0; i < 5; i++)
 131         if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
 132             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
 133             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
 134             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
 135             frame = &s->frames[i];
 136             break;
 137         }
 138     if (i == 5) {
 139         av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
 140         abort();
 141     }
 142     if (frame->tf.f->data[0])
 143         vp8_release_frame(s, frame);
 144
 145     return frame;
 146 }
 147
 148 static int update_dimensions(VP8Context *s, int width, int height)
 149 {
 150     AVCodecContext *avctx = s->avctx;
 151     int i, ret;
 152
 153     if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
 154         height != s->avctx->height) {
 155         vp8_decode_flush_impl(s->avctx, 1);
 156
 157         ret = ff_set_dimensions(s->avctx, width, height);
 158         if (ret < 0)
 159             return ret;
 160     }
 161
 162     s->mb_width  = (s->avctx->coded_width +15) / 16;
 163     s->mb_height = (s->avctx->coded_height+15) / 16;
 164
 165     s->mb_layout = s->vp7 || (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
 166     if (!s->mb_layout) { // Frame threading and one thread
 167         s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
 168         s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
 169     }
 170     else // Sliced threading
 171         s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
 172     s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
 173     s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
 174     s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
 175
 176     for (i = 0; i < MAX_THREADS; i++) {
 177         s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
 178 #if HAVE_THREADS
 179         pthread_mutex_init(&s->thread_data[i].lock, NULL);
 180         pthread_cond_init(&s->thread_data[i].cond, NULL);
 181 #endif
 182     }
 183
 184     if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
 185         (!s->intra4x4_pred_mode_top && !s->mb_layout))
 186         return AVERROR(ENOMEM);
 187
 188     s->macroblocks        = s->macroblocks_base + 1;
 189
 190     return 0;
 191 }
 192
 193
 194 #if CONFIG_VP8_DECODER
 195 static void parse_segment_info(VP8Context *s)
 196 {
 197     VP56RangeCoder *c = &s->c;
 198     int i;
 199
 200     s->segmentation.update_map = vp8_rac_get(c);
 201
 202     if (vp8_rac_get(c)) { // update segment feature data
 203         s->segmentation.absolute_vals = vp8_rac_get(c);
 204
 205         for (i = 0; i < 4; i++)
 206             s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);
 207
 208         for (i = 0; i < 4; i++)
 209             s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
 210     }
 211     if (s->segmentation.update_map)
 212         for (i = 0; i < 3; i++)
 213             s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
 214 }
 215
 216 static void update_lf_deltas(VP8Context *s)
 217 {
 218     VP56RangeCoder *c = &s->c;
 219     int i;
 220
 221     for (i = 0; i < 4; i++) {
 222         if (vp8_rac_get(c)) {
 223             s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
 224
 225             if (vp8_rac_get(c))
 226                 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
 227         }
 228     }
 229
 230     for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
 231         if (vp8_rac_get(c)) {
 232             s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
 233
 234             if (vp8_rac_get(c))
 235                 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
 236         }
 237     }
 238 }
 239
 240 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
 241 {
 242     const uint8_t *sizes = buf;
 243     int i;
 244
 245     s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
 246
 247     buf      += 3*(s->num_coeff_partitions-1);
 248     buf_size -= 3*(s->num_coeff_partitions-1);
 249     if (buf_size < 0)
 250         return -1;
 251
 252     for (i = 0; i < s->num_coeff_partitions-1; i++) {
 253         int size = AV_RL24(sizes + 3*i);
 254         if (buf_size - size < 0)
 255             return -1;
 256
 257         ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
 258         buf      += size;
 259         buf_size -= size;
 260     }
 261     ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
 262
 263     return 0;
 264 }
 265 #endif
 266
 267 #if CONFIG_VP7_DECODER
 268 static void vp7_get_quants(VP8Context *s)
 269 {
 270     VP56RangeCoder *c = &s->c;
 271
 272     int yac_qi  = vp8_rac_get_uint(c, 7);
 273     int ydc_qi  = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
 274     int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
 275     int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
 276     int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
 277     int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
 278
 279     s->qmat[0].luma_qmul[0]    =       vp7_ydc_qlookup[ydc_qi];
 280     s->qmat[0].luma_qmul[1]    =       vp7_yac_qlookup[yac_qi];
 281     s->qmat[0].luma_dc_qmul[0] =       vp7_y2dc_qlookup[y2dc_qi];
 282     s->qmat[0].luma_dc_qmul[1] =       vp7_y2ac_qlookup[y2ac_qi];
 283     s->qmat[0].chroma_qmul[0]  = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
 284     s->qmat[0].chroma_qmul[1]  =       vp7_yac_qlookup[uvac_qi];
 285 }
 286 #endif
 287
 288 #if CONFIG_VP8_DECODER
 289 static void vp8_get_quants(VP8Context *s)
 290 {
 291     VP56RangeCoder *c = &s->c;
 292     int i, base_qi;
 293
 294     int yac_qi     = vp8_rac_get_uint(c, 7);
 295     int ydc_delta  = vp8_rac_get_sint(c, 4);
 296     int y2dc_delta = vp8_rac_get_sint(c, 4);
 297     int y2ac_delta = vp8_rac_get_sint(c, 4);
 298     int uvdc_delta = vp8_rac_get_sint(c, 4);
 299     int uvac_delta = vp8_rac_get_sint(c, 4);
 300
 301     for (i = 0; i < 4; i++) {
 302         if (s->segmentation.enabled) {
 303             base_qi = s->segmentation.base_quant[i];
 304             if (!s->segmentation.absolute_vals)
 305                 base_qi += yac_qi;
 306         } else
 307             base_qi = yac_qi;
 308
 309         s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
 310         s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
 311         s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
 312         /* 101581>>16 is equivalent to 155/100 */
 313         s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
 314         s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
 315         s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
 316
 317         s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
 318         s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
 319     }
 320 }
 321
 322 /**
 323  * Determine which buffers golden and altref should be updated with after this frame.
 324  * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 325  *
 326  * Intra frames update all 3 references
 327  * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 328  * If the update (golden|altref) flag is set, it's updated with the current frame
 329  *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 330  * If the flag is not set, the number read means:
 331  *      0: no update
 332  *      1: VP56_FRAME_PREVIOUS
 333  *      2: update golden with altref, or update altref with golden
 334  */
 335 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
 336 {
 337     VP56RangeCoder *c = &s->c;
 338
 339     if (update)
 340         return VP56_FRAME_CURRENT;
 341
 342     switch (vp8_rac_get_uint(c, 2)) {
 343     case 1:
 344         return VP56_FRAME_PREVIOUS;
 345     case 2:
 346         return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
 347     }
 348     return VP56_FRAME_NONE;
 349 }
 350 #endif
 351
 352 static void vp78_reset_probability_tables(VP8Context *s)
 353 {
 354     int i, j;
 355     for (i = 0; i < 4; i++)
 356         for (j = 0; j < 16; j++)
 357             memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
 358                    sizeof(s->prob->token[i][j]));
 359 }
 360
 361 static void vp78_update_probability_tables(VP8Context *s)
 362 {
 363     VP56RangeCoder *c = &s->c;
 364     int i, j, k, l, m;
 365
 366     for (i = 0; i < 4; i++)
 367         for (j = 0; j < 8; j++)
 368             for (k = 0; k < 3; k++)
 369                 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
 370                     if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
 371                         int prob = vp8_rac_get_uint(c, 8);
 372                         for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
 373                             s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
 374                     }
 375 }
 376
 377 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s)
 378 {
 379     VP56RangeCoder *c = &s->c;
 380     int i, j;
 381
 382     if (vp8_rac_get(c))
 383         for (i = 0; i < 4; i++)
 384             s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
 385     if (vp8_rac_get(c))
 386         for (i = 0; i < 3; i++)
 387             s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);
 388
 389     // 17.2 MV probability update
 390     for (i = 0; i < 2; i++)
 391         for (j = 0; j < (s->vp7 ? 17 : 19); j++)
 392             if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
 393                 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
 394 }
 395
 396 #if CONFIG_VP8_DECODER
 397 static void update_refs(VP8Context *s)
 398 {
 399     VP56RangeCoder *c = &s->c;
 400
 401     int update_golden = vp8_rac_get(c);
 402     int update_altref = vp8_rac_get(c);
 403
 404     s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
 405     s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
 406 }
 407 #endif
 408
 409 #if CONFIG_VP7_DECODER
 410 static void fade(uint8_t *dst, int dst_linesize, const uint8_t *src, int src_linesize, int width, int height, int alpha, int beta)
 411 {
 412     int i, j;
 413     for (j = 0; j < height; j++)
 414         for (i = 0; i < width; i++) {
 415             uint8_t y = src[j*src_linesize + i];
 416             dst[j*dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
 417         }
 418 }
 419
 420 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
 421 {
 422     VP56RangeCoder *c = &s->c;
 423     int part1_size, hscale, vscale, i, j, ret;
 424     int width  = s->avctx->width;
 425     int height = s->avctx->height;
 426
 427     s->profile   =  (buf[0]>>1) & 7;
 428     if (s->profile > 1) {
 429         avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
 430         return AVERROR_INVALIDDATA;
 431     }
 432
 433     s->keyframe  = !(buf[0] & 1);
 434     s->invisible = 0;
 435     part1_size   = AV_RL24(buf) >> 4;
 436
 437     buf      += 4 - s->profile;
 438     buf_size -= 4 - s->profile;
 439
 440     memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
 441
 442     ff_vp56_init_range_decoder(c, buf, part1_size);
 443     buf      += part1_size;
 444     buf_size -= part1_size;
 445
 446     /* A. Dimension information (keyframes only) */
 447     if (s->keyframe) {
 448         width  = vp8_rac_get_uint(c, 12);
 449         height = vp8_rac_get_uint(c, 12);
 450         hscale = vp8_rac_get_uint(c, 2);
 451         vscale = vp8_rac_get_uint(c, 2);
 452         if (hscale || vscale)
 453             avpriv_request_sample(s->avctx, "Upscaling");
 454
 455         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
 456         vp78_reset_probability_tables(s);
 457         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
 458         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
 459         for (i = 0; i < 2; i++)
 460             memcpy(s->prob->mvc[i], vp7_mv_default_prob[i], sizeof(vp7_mv_default_prob[i]));
 461         memset(&s->segmentation, 0, sizeof(s->segmentation));
 462         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
 463         memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
 464     }
 465
 466     if (s->keyframe || s->profile > 0)
 467         memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
 468
 469     /* B. Decoding information for all four macroblock-level features */
 470     for (i = 0; i < 4; i++) {
 471         s->feature_enabled[i] = vp8_rac_get(c);
 472         if (s->feature_enabled[i]) {
 473              s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
 474
 475              for (j = 0; j < 3; j++)
 476                  s->feature_index_prob[i][j] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
 477
 478              if (vp7_feature_value_size[i])
 479                  for (j = 0; j < 4; j++)
 480                      s->feature_value[i][j] = vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
 481         }
 482     }
 483
 484     s->segmentation.enabled = 0;
 485     s->segmentation.update_map = 0;
 486     s->lf_delta.enabled = 0;
 487
 488     s->num_coeff_partitions = 1;
 489     ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
 490
 491     if (!s->macroblocks_base || /* first frame */
 492         width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
 493         if ((ret = update_dimensions(s, width, height)) < 0)
 494             return ret;
 495     }
 496
 497     /* C. Dequantization indices */
 498     vp7_get_quants(s);
 499
 500     /* D. Golden frame update flag (a Flag) for interframes only */
 501     if (!s->keyframe) {
 502         s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
 503         s->sign_bias[VP56_FRAME_GOLDEN] = 0;
 504     }
 505
 506     s->update_last = 1;
 507     s->update_probabilities = 1;
 508     s->fade_present         = 1;
 509
 510     if (s->profile > 0) {
 511         s->update_probabilities = vp8_rac_get(c);
 512         if (!s->update_probabilities)
 513             s->prob[1] = s->prob[0];
 514
 515         if (!s->keyframe)
 516             s->fade_present = vp8_rac_get(c);
 517     }
 518
 519     /* E. Fading information for previous frame */
 520     if (s->fade_present && vp8_rac_get(c)) {
 521         int alpha = (int8_t)vp8_rac_get_uint(c, 8);
 522         int beta  = (int8_t)vp8_rac_get_uint(c, 8);
 523         if (!s->keyframe && (alpha || beta)) {
 524             /* preserve the golden frame */
 525             if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
 526                 AVFrame *gold = s->framep[VP56_FRAME_GOLDEN]->tf.f;
 527                 AVFrame *prev;
 528                 int i, j;
 529
 530                 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
 531                 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
 532                    return ret;
 533                 prev = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
 534
 535                 fade(prev->data[0], prev->linesize[0], gold->data[0], gold->linesize[0], s->mb_width * 16, s->mb_height * 16, alpha, beta);
 536                 for (j = 1; j < 3; j++)
 537                     for (i = 0; i < s->mb_height * 8; i++)
 538                         memcpy(prev->data[j] + i * prev->linesize[j], gold->data[j] + i * gold->linesize[j], s->mb_width * 8);
 539             } else {
 540                 AVFrame *prev = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
 541                 fade(prev->data[0], prev->linesize[0], prev->data[0], prev->linesize[0], s->mb_width * 16, s->mb_height * 16, alpha, beta);
 542             }
 543
 544         }
 545     }
 546
 547     /* F. Loop filter type */
 548     if (!s->profile)
 549         s->filter.simple = vp8_rac_get(c);
 550
 551     /* G. DCT coefficient ordering specification */
 552     if (vp8_rac_get(c))
 553         for (i = 1; i < 16; i++)
 554             s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
 555
 556     /* H. Loop filter levels  */
 557     if (s->profile > 0)
 558         s->filter.simple = vp8_rac_get(c);
 559     s->filter.level     = vp8_rac_get_uint(c, 6);
 560     s->filter.sharpness = vp8_rac_get_uint(c, 3);
 561
 562     /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
 563     vp78_update_probability_tables(s);
 564
 565     s->mbskip_enabled = 0;
 566
 567     /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
 568     if (!s->keyframe) {
 569         s->prob->intra  = vp8_rac_get_uint(c, 8);
 570         s->prob->last   = vp8_rac_get_uint(c, 8);
 571         vp78_update_pred16x16_pred8x8_mvc_probabilities(s);
 572     }
 573
 574     return 0;
 575 }
 576 #endif
 577
 578 #if CONFIG_VP8_DECODER
 579 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
 580 {
 581     VP56RangeCoder *c = &s->c;
 582     int header_size, hscale, vscale, ret;
 583     int width  = s->avctx->width;
 584     int height = s->avctx->height;
 585
 586     s->keyframe  = !(buf[0] & 1);
 587     s->profile   =  (buf[0]>>1) & 7;
 588     s->invisible = !(buf[0] & 0x10);
 589     header_size  = AV_RL24(buf) >> 5;
 590     buf      += 3;
 591     buf_size -= 3;
 592
 593     if (s->profile > 3)
 594         av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
 595
 596     if (!s->profile)
 597         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
 598     else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
 599         memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
 600
 601     if (header_size > buf_size - 7*s->keyframe) {
 602         av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
 603         return AVERROR_INVALIDDATA;
 604     }
 605
 606     if (s->keyframe) {
 607         if (AV_RL24(buf) != 0x2a019d) {
 608             av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
 609             return AVERROR_INVALIDDATA;
 610         }
 611         width  = AV_RL16(buf+3) & 0x3fff;
 612         height = AV_RL16(buf+5) & 0x3fff;
 613         hscale = buf[4] >> 6;
 614         vscale = buf[6] >> 6;
 615         buf      += 7;
 616         buf_size -= 7;
 617
 618         if (hscale || vscale)
 619             avpriv_request_sample(s->avctx, "Upscaling");
 620
 621         s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
 622         vp78_reset_probability_tables(s);
 623         memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
 624         memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
 625         memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
 626         memset(&s->segmentation, 0, sizeof(s->segmentation));
 627         memset(&s->lf_delta, 0, sizeof(s->lf_delta));
 628     }
 629
 630     ff_vp56_init_range_decoder(c, buf, header_size);
 631     buf      += header_size;
 632     buf_size -= header_size;
 633
 634     if (s->keyframe) {
 635         if (vp8_rac_get(c))
 636             av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
 637         vp8_rac_get(c); // whether we can skip clamping in dsp functions
 638     }
 639
 640     if ((s->segmentation.enabled = vp8_rac_get(c)))
 641         parse_segment_info(s);
 642     else
 643         s->segmentation.update_map = 0; // FIXME: move this to some init function?
 644
 645     s->filter.simple    = vp8_rac_get(c);
 646     s->filter.level     = vp8_rac_get_uint(c, 6);
 647     s->filter.sharpness = vp8_rac_get_uint(c, 3);
 648
 649     if ((s->lf_delta.enabled = vp8_rac_get(c)))
 650         if (vp8_rac_get(c))
 651             update_lf_deltas(s);
 652
 653     if (setup_partitions(s, buf, buf_size)) {
 654         av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
 655         return AVERROR_INVALIDDATA;
 656     }
 657
 658     if (!s->macroblocks_base || /* first frame */
 659         width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
 660         if ((ret = update_dimensions(s, width, height)) < 0)
 661             return ret;
 662     }
 663
 664     vp8_get_quants(s);
 665
 666     if (!s->keyframe) {
 667         update_refs(s);
 668         s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
 669         s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
 670     }
 671
 672     // if we aren't saving this frame's probabilities for future frames,
 673     // make a copy of the current probabilities
 674     if (!(s->update_probabilities = vp8_rac_get(c)))
 675         s->prob[1] = s->prob[0];
 676
 677     s->update_last = s->keyframe || vp8_rac_get(c);
 678
 679     vp78_update_probability_tables(s);
 680
 681     if ((s->mbskip_enabled = vp8_rac_get(c)))
 682         s->prob->mbskip = vp8_rac_get_uint(c, 8);
 683
 684     if (!s->keyframe) {
 685         s->prob->intra  = vp8_rac_get_uint(c, 8);
 686         s->prob->last   = vp8_rac_get_uint(c, 8);
 687         s->prob->golden = vp8_rac_get_uint(c, 8);
 688         vp78_update_pred16x16_pred8x8_mvc_probabilities(s);
 689     }
 690
 691     return 0;
 692 }
 693 #endif
 694
 695 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
 696 {
 697     dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
 698     dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
 699 }
 700
 701 /**
 702  * Motion vector coding, 17.1.
 703  */
 704 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
 705 {
 706     int bit, x = 0;
 707
 708     if (vp56_rac_get_prob_branchy(c, p[0])) {
 709         int i;
 710
 711         for (i = 0; i < 3; i++)
 712             x += vp56_rac_get_prob(c, p[9 + i]) << i;
 713         for (i = (vp7 ? 7 : 9); i > 3; i--)
 714             x += vp56_rac_get_prob(c, p[9 + i]) << i;
 715         if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
 716             x += 8;
 717     } else {
 718         // small_mvtree
 719         const uint8_t *ps = p+2;
 720         bit = vp56_rac_get_prob(c, *ps);
 721         ps += 1 + 3*bit;
 722         x  += 4*bit;
 723         bit = vp56_rac_get_prob(c, *ps);
 724         ps += 1 + bit;
 725         x  += 2*bit;
 726         x  += vp56_rac_get_prob(c, *ps);
 727     }
 728
 729     return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
 730 }
 731
 732 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
 733 {
 734     return read_mv_component(c, p, 1);
 735 }
 736
 737 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
 738 {
 739     return read_mv_component(c, p, 0);
 740 }
 741
 742 static av_always_inline
 743 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
 744 {
 745     if (left == top)
 746         return vp8_submv_prob[4-!!left];
 747     if (!top)
 748         return vp8_submv_prob[2];
 749     return vp8_submv_prob[1-!!left];
 750 }
 751
 752 /**
 753  * Split motion vector prediction, 16.4.
 754  * @returns the number of motion vectors parsed (2, 4 or 16)
 755  */
 756 static av_always_inline
 757 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout, int vp7)
 758 {
 759     int part_idx;
 760     int n, num;
 761     VP8Macroblock *top_mb;
 762     VP8Macroblock *left_mb = &mb[-1];
 763     const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
 764                   *mbsplits_top,
 765                   *mbsplits_cur, *firstidx;
 766     VP56mv *top_mv;
 767     VP56mv *left_mv = left_mb->bmv;
 768     VP56mv *cur_mv  = mb->bmv;
 769
 770     if (!layout) // layout is inlined, s->mb_layout is not
 771         top_mb = &mb[2];
 772     else
 773         top_mb = &mb[-s->mb_width-1];
 774     mbsplits_top = vp8_mbsplits[top_mb->partitioning];
 775     top_mv = top_mb->bmv;
 776
 777     if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
 778         if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
 779             part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
 780         } else {
 781             part_idx = VP8_SPLITMVMODE_8x8;
 782         }
 783     } else {
 784         part_idx = VP8_SPLITMVMODE_4x4;
 785     }
 786
 787     num = vp8_mbsplit_count[part_idx];
 788     mbsplits_cur = vp8_mbsplits[part_idx],
 789     firstidx = vp8_mbfirstidx[part_idx];
 790     mb->partitioning = part_idx;
 791
 792     for (n = 0; n < num; n++) {
 793         int k = firstidx[n];
 794         uint32_t left, above;
 795         const uint8_t *submv_prob;
 796
 797         if (!(k & 3))
 798             left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
 799         else
 800             left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
 801         if (k <= 3)
 802             above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
 803         else
 804             above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
 805
 806         submv_prob = vp7 ? vp7_submv_prob : get_submv_prob(left, above);
 807
 808         if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
 809             if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
 810                 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
 811                     mb->bmv[n].y = mb->mv.y + VPX(vp7, read_mv_component)(c, s->prob->mvc[0]);
 812                     mb->bmv[n].x = mb->mv.x + VPX(vp7, read_mv_component)(c, s->prob->mvc[1]);
 813                 } else {
 814                     AV_ZERO32(&mb->bmv[n]);
 815                 }
 816             } else {
 817                 AV_WN32A(&mb->bmv[n], above);
 818             }
 819         } else {
 820             AV_WN32A(&mb->bmv[n], left);
 821         }
 822     }
 823
 824     return num;
 825 }
 826
 827 /**
 828  * the vp7 reference decoder uses a padding macroblock column (added to right
 829  * edge of the frame) to guard against illegal macroblock offsets. The algorithm
 830  * has bugs that permit offsets to straddle the padding column. This function
 831  * replicates those bugs.
 832  * @param[out] edge_x macroblock x address
 833  * @param[out] edge_y macroblock y address
 834  * @return macroblock offset legal (boolean)
 835  */
 836 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width, int xoffset, int yoffset, int boundary, int *edge_x, int *edge_y)
 837 {
 838     int vwidth = mb_width + 1;
 839     int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
 840     if (new < boundary || new % vwidth == vwidth - 1)
 841         return 0;
 842     *edge_y = new / vwidth;
 843     *edge_x = new % vwidth;
 844     return 1;
 845 }
 846
 847 static const VP56mv * get_bmv_ptr(const VP8Macroblock *mb, int subblock)
 848 {
 849     return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
 850 }
 851
 852 static av_always_inline
 853 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
 854 {
 855     VP8Macroblock *mb_edge[12];
 856     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
 857     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
 858     int idx = CNT_ZERO;
 859     VP56mv near_mv[3];
 860     uint8_t cnt[3] = { 0 };
 861     VP56RangeCoder *c = &s->c;
 862     int i;
 863
 864     AV_ZERO32(&near_mv[0]);
 865     AV_ZERO32(&near_mv[1]);
 866     AV_ZERO32(&near_mv[2]);
 867
 868     for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
 869         const VP7MVPred * pred = &vp7_mv_pred[i];
 870         int edge_x, edge_y;
 871
 872         if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset, pred->yoffset, !s->profile, &edge_x, &edge_y)) {
 873             VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1) ?
 874                                                    s->macroblocks_base + (s->mb_width+1)*(edge_y + 1) + 1 + edge_x :
 875                                                    s->macroblocks + (s->mb_height - edge_y - 1)*2 + edge_x;
 876             uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
 877             if (mv) {
 878                 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
 879                     if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
 880                         idx = CNT_NEAREST;
 881                     } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
 882                         if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
 883                             continue;
 884                         idx = CNT_NEAR;
 885                     } else {
 886                         AV_WN32A(&near_mv[CNT_NEAR], mv);
 887                         idx = CNT_NEAR;
 888                     }
 889                 } else {
 890                     AV_WN32A(&near_mv[CNT_NEAREST], mv);
 891                     idx = CNT_NEAREST;
 892                 }
 893             } else {
 894                 idx = CNT_ZERO;
 895             }
 896         } else {
 897             idx = CNT_ZERO;
 898         }
 899         cnt[idx] += vp7_mv_pred[i].score;
 900     }
 901
 902     mb->partitioning = VP8_SPLITMVMODE_NONE;
 903
 904     if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
 905         mb->mode = VP8_MVMODE_MV;
 906
 907         if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
 908
 909             if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
 910
 911                 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
 912                     AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
 913                 else
 914                     AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR]    ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
 915
 916                 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
 917                     mb->mode = VP8_MVMODE_SPLIT;
 918                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, 1) - 1];
 919                 } else {
 920                     mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
 921                     mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
 922                     mb->bmv[0] = mb->mv;
 923                 }
 924             } else {
 925                 mb->mv = near_mv[CNT_NEAR];
 926                 mb->bmv[0] = mb->mv;
 927             }
 928         } else {
 929             mb->mv = near_mv[CNT_NEAREST];
 930             mb->bmv[0] = mb->mv;
 931         }
 932     } else {
 933         mb->mode = VP8_MVMODE_ZERO;
 934         AV_ZERO32(&mb->mv);
 935         mb->bmv[0] = mb->mv;
 936     }
 937 }
 938
 939 static av_always_inline
 940 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
 941 {
 942     VP8Macroblock *mb_edge[3] = { 0 /* top */,
 943                                   mb - 1 /* left */,
 944                                   0 /* top-left */ };
 945     enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
 946     enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
 947     int idx = CNT_ZERO;
 948     int cur_sign_bias = s->sign_bias[mb->ref_frame];
 949     int8_t *sign_bias = s->sign_bias;
 950     VP56mv near_mv[4];
 951     uint8_t cnt[4] = { 0 };
 952     VP56RangeCoder *c = &s->c;
 953
 954     if (!layout) { // layout is inlined (s->mb_layout is not)
 955         mb_edge[0] = mb + 2;
 956         mb_edge[2] = mb + 1;
 957     }
 958     else {
 959         mb_edge[0] = mb - s->mb_width-1;
 960         mb_edge[2] = mb - s->mb_width-2;
 961     }
 962
 963     AV_ZERO32(&near_mv[0]);
 964     AV_ZERO32(&near_mv[1]);
 965     AV_ZERO32(&near_mv[2]);
 966
 967     /* Process MB on top, left and top-left */
 968     #define MV_EDGE_CHECK(n)\
 969     {\
 970         VP8Macroblock *edge = mb_edge[n];\
 971         int edge_ref = edge->ref_frame;\
 972         if (edge_ref != VP56_FRAME_CURRENT) {\
 973             uint32_t mv = AV_RN32A(&edge->mv);\
 974             if (mv) {\
 975                 if (cur_sign_bias != sign_bias[edge_ref]) {\
 976                     /* SWAR negate of the values in mv. */\
 977                     mv = ~mv;\
 978                     mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
 979                 }\
 980                 if (!n || mv != AV_RN32A(&near_mv[idx]))\
 981                     AV_WN32A(&near_mv[++idx], mv);\
 982                 cnt[idx]      += 1 + (n != 2);\
 983             } else\
 984                 cnt[CNT_ZERO] += 1 + (n != 2);\
 985         }\
 986     }
 987
 988     MV_EDGE_CHECK(0)
 989     MV_EDGE_CHECK(1)
 990     MV_EDGE_CHECK(2)
 991
 992     mb->partitioning = VP8_SPLITMVMODE_NONE;
 993     if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
 994         mb->mode = VP8_MVMODE_MV;
 995
 996         /* If we have three distinct MVs, merge first and last if they're the same */
 997         if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
 998             cnt[CNT_NEAREST] += 1;
 999
1000         /* Swap near and nearest if necessary */
1001         if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1002             FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
1003             FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1004         }
1005
1006         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1007             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1008
1009                 /* Choose the best mv out of 0,0 and the nearest mv */
1010                 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1011                 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
1012                                     (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
1013                                     (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1014
1015                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1016                     mb->mode = VP8_MVMODE_SPLIT;
1017                     mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, 0) - 1];
1018                 } else {
1019                     mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1020                     mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1021                     mb->bmv[0] = mb->mv;
1022                 }
1023             } else {
1024                 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1025                 mb->bmv[0] = mb->mv;
1026             }
1027         } else {
1028             clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1029             mb->bmv[0] = mb->mv;
1030         }
1031     } else {
1032         mb->mode = VP8_MVMODE_ZERO;
1033         AV_ZERO32(&mb->mv);
1034         mb->bmv[0] = mb->mv;
1035     }
1036 }
1037
1038 static av_always_inline
1039 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1040                            int mb_x, int keyframe, int layout)
1041 {
1042     uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1043
1044     if (layout) {
1045         VP8Macroblock *mb_top = mb - s->mb_width - 1;
1046         memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1047     }
1048     if (keyframe) {
1049         int x, y;
1050         uint8_t* top;
1051         uint8_t* const left = s->intra4x4_pred_mode_left;
1052         if (layout)
1053             top = mb->intra4x4_pred_mode_top;
1054         else
1055             top = s->intra4x4_pred_mode_top + 4 * mb_x;
1056         for (y = 0; y < 4; y++) {
1057             for (x = 0; x < 4; x++) {
1058                 const uint8_t *ctx;
1059                 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1060                 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1061                 left[y] = top[x] = *intra4x4;
1062                 intra4x4++;
1063             }
1064         }
1065     } else {
1066         int i;
1067         for (i = 0; i < 16; i++)
1068             intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
1069     }
1070 }
1071
1072 static const char * vp7_feature_name[] = { "q-index", "lf-delta", "partial-golden-update", "blit-pitch" };
1073
1074 static av_always_inline
1075 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1076                     uint8_t *segment, uint8_t *ref, int layout, int vp7)
1077 {
1078     VP56RangeCoder *c = &s->c;
1079
1080     if (vp7) {
1081         int i;
1082         *segment = 0;
1083         for (i = 0; i < 4; i++) {
1084             if (s->feature_enabled[i]) {
1085                 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1086                       int index = vp8_rac_get_tree(c, vp7_feature_index_tree, s->feature_index_prob[i]);
1087                       av_log(s->avctx, AV_LOG_WARNING, "Feature %s present in macroblock (value 0x%x)\n", vp7_feature_name[i], s->feature_value[i][index]);
1088                 }
1089             }
1090         }
1091     } else {
1092         if (s->segmentation.update_map) {
1093             int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1094             *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1095         } else if (s->segmentation.enabled)
1096             *segment = ref ? *ref : *segment;
1097     }
1098     mb->segment = *segment;
1099
1100     mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1101
1102     if (s->keyframe) {
1103         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
1104
1105         if (mb->mode == MODE_I4x4) {
1106             decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1107         } else {
1108             const uint32_t modes = VPX(vp7, pred4x4_mode)[mb->mode] * 0x01010101u;
1109             if (s->mb_layout)
1110                 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1111             else
1112                 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1113             AV_WN32A( s->intra4x4_pred_mode_left, modes);
1114         }
1115
1116         mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
1117         mb->ref_frame = VP56_FRAME_CURRENT;
1118     } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1119         // inter MB, 16.2
1120         if (vp56_rac_get_prob_branchy(c, s->prob->last))
1121             mb->ref_frame = (!vp7 && vp56_rac_get_prob(c, s->prob->golden)) ?
1122                 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
1123         else
1124             mb->ref_frame = VP56_FRAME_PREVIOUS;
1125         s->ref_count[mb->ref_frame-1]++;
1126
1127         // motion vectors, 16.3
1128         if (vp7)
1129             vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1130         else
1131             vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1132     } else {
1133         // intra MB, 16.1
1134         mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1135
1136         if (mb->mode == MODE_I4x4)
1137             decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1138
1139         mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
1140         mb->ref_frame = VP56_FRAME_CURRENT;
1141         mb->partitioning = VP8_SPLITMVMODE_NONE;
1142         AV_ZERO32(&mb->bmv[0]);
1143     }
1144 }
1145
1146 static av_always_inline int inter_predict_dc(int16_t block[16], int16_t pred[2])
1147 {
1148     int16_t dc = block[0];
1149     int ret = 0;
1150
1151     if (pred[1] > 3) {
1152         dc += pred[0];
1153         ret = 1;
1154     }
1155
1156     if (!pred[0] || !dc || FFSIGN(pred[0]) != FFSIGN(dc)) {
1157         block[0] = pred[0] = dc;
1158         pred[1] = 0;
1159     } else {
1160         if (pred[0] == dc)
1161             pred[1]++;
1162         block[0] = pred[0] = dc;
1163     }
1164
1165     return ret;
1166 }
1167
1168 /**
1169  * @param r arithmetic bitstream reader context
1170  * @param block destination for block coefficients
1171  * @param probs probabilities to use when reading trees from the bitstream
1172  * @param i initial coeff index, 0 unless a separate DC block is coded
1173  * @param qmul array holding the dc/ac dequant factor at position 0/1
1174  * @return 0 if no coeffs were decoded
1175  *         otherwise, the index of the last coeff decoded plus one
1176  */
1177 static av_always_inline
1178 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1179                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1180                                         int i, uint8_t *token_prob, int16_t qmul[2],
1181                                         const uint8_t scan[16], int vp7)
1182 {
1183     VP56RangeCoder c = *r;
1184     goto skip_eob;
1185     do {
1186         int coeff;
1187 restart:
1188         if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
1189             break;
1190
1191 skip_eob:
1192         if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1193             if (++i == 16)
1194                 break; // invalid input; blocks should end with EOB
1195             token_prob = probs[i][0];
1196             if (vp7)
1197                 goto restart;
1198             goto skip_eob;
1199         }
1200
1201         if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1202             coeff = 1;
1203             token_prob = probs[i+1][1];
1204         } else {
1205             if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1206                 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1207                 if (coeff)
1208                     coeff += vp56_rac_get_prob(&c, token_prob[5]);
1209                 coeff += 2;
1210             } else {
1211                 // DCT_CAT*
1212                 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1213                     if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1214                         coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1215                     } else {                                    // DCT_CAT2
1216                         coeff  = 7;
1217                         coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1218                         coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1219                     }
1220                 } else {    // DCT_CAT3 and up
1221                     int a = vp56_rac_get_prob(&c, token_prob[8]);
1222                     int b = vp56_rac_get_prob(&c, token_prob[9+a]);
1223                     int cat = (a<<1) + b;
1224                     coeff  = 3 + (8<<cat);
1225                     coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1226                 }
1227             }
1228             token_prob = probs[i+1][2];
1229         }
1230         block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1231     } while (++i < 16);
1232
1233     *r = c;
1234     return i;
1235 }
1236
1237 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1238                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1239                                         int i, uint8_t *token_prob, int16_t qmul[2],
1240                                         const uint8_t scan[16])
1241 {
1242     return decode_block_coeffs_internal(r, block, probs, i, token_prob, qmul, scan, 1);
1243 }
1244
1245 #ifndef vp8_decode_block_coeffs_internal
1246 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1247                                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1248                                         int i, uint8_t *token_prob, int16_t qmul[2])
1249 {
1250     return decode_block_coeffs_internal(r, block, probs, i, token_prob, qmul, zigzag_scan, 0);
1251 }
1252 #endif
1253
1254 /**
1255  * @param c arithmetic bitstream reader context
1256  * @param block destination for block coefficients
1257  * @param probs probabilities to use when reading trees from the bitstream
1258  * @param i initial coeff index, 0 unless a separate DC block is coded
1259  * @param zero_nhood the initial prediction context for number of surrounding
1260  *                   all-zero blocks (only left/top, so 0-2)
1261  * @param qmul array holding the dc/ac dequant factor at position 0/1
1262  * @param scan scan pattern (VP7 only)
1263  * @return 0 if no coeffs were decoded
1264  *         otherwise, the index of the last coeff decoded plus one
1265  */
1266 static av_always_inline
1267 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1268                         uint8_t probs[16][3][NUM_DCT_TOKENS-1],
1269                         int i, int zero_nhood, int16_t qmul[2],
1270                         const uint8_t scan[16], int vp7)
1271 {
1272     uint8_t *token_prob = probs[i][zero_nhood];
1273     if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
1274         return 0;
1275     return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul, scan)
1276                : vp8_decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
1277 }
1278
1279 static av_always_inline
1280 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
1281                       uint8_t t_nnz[9], uint8_t l_nnz[9], int vp7)
1282 {
1283     int i, x, y, luma_start = 0, luma_ctx = 3;
1284     int nnz_pred, nnz, nnz_total = 0;
1285     int segment = mb->segment;
1286     int block_dc = 0;
1287
1288     if (mb->mode != MODE_I4x4 && (vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1289         nnz_pred = t_nnz[8] + l_nnz[8];
1290
1291         // decode DC values and do hadamard
1292         nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
1293                                   s->qmat[segment].luma_dc_qmul, zigzag_scan, vp7);
1294         l_nnz[8] = t_nnz[8] = !!nnz;
1295
1296         if (vp7 && mb->mode > MODE_I4x4)
1297             nnz |= inter_predict_dc(td->block_dc, s->inter_dc_pred[mb->ref_frame - 1]);
1298
1299         if (nnz) {
1300             nnz_total += nnz;
1301             block_dc = 1;
1302             if (nnz == 1)
1303                 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1304             else
1305                 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1306         }
1307         luma_start = 1;
1308         luma_ctx = 0;
1309     }
1310
1311     // luma blocks
1312     for (y = 0; y < 4; y++)
1313         for (x = 0; x < 4; x++) {
1314             nnz_pred = l_nnz[y] + t_nnz[x];
1315             nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
1316                                       nnz_pred, s->qmat[segment].luma_qmul, s->prob[0].scan, vp7);
1317             // nnz+block_dc may be one more than the actual last index, but we don't care
1318             td->non_zero_count_cache[y][x] = nnz + block_dc;
1319             t_nnz[x] = l_nnz[y] = !!nnz;
1320             nnz_total += nnz;
1321         }
1322
1323     // chroma blocks
1324     // TODO: what to do about dimensions? 2nd dim for luma is x,
1325     // but for chroma it's (y<<1)|x
1326     for (i = 4; i < 6; i++)
1327         for (y = 0; y < 2; y++)
1328             for (x = 0; x < 2; x++) {
1329                 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
1330                 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
1331                                           nnz_pred, s->qmat[segment].chroma_qmul, s->prob[0].scan, vp7);
1332                 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
1333                 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
1334                 nnz_total += nnz;
1335             }
1336
1337     // if there were no coded coeffs despite the macroblock not being marked skip,
1338     // we MUST not do the inner loop filter and should not do IDCT
1339     // Since skip isn't used for bitstream prediction, just manually set it.
1340     if (!nnz_total)
1341         mb->skip = 1;
1342 }
1343
1344 static av_always_inline
1345 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
1346                       int linesize, int uvlinesize, int simple)
1347 {
1348     AV_COPY128(top_border, src_y + 15*linesize);
1349     if (!simple) {
1350         AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
1351         AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
1352     }
1353 }
1354
1355 static av_always_inline
1356 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
1357                     int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
1358                     int simple, int xchg)
1359 {
1360     uint8_t *top_border_m1 = top_border-32;     // for TL prediction
1361     src_y  -=   linesize;
1362     src_cb -= uvlinesize;
1363     src_cr -= uvlinesize;
1364
1365 #define XCHG(a,b,xchg) do {                     \
1366         if (xchg) AV_SWAP64(b,a);               \
1367         else      AV_COPY64(b,a);               \
1368     } while (0)
1369
1370     XCHG(top_border_m1+8, src_y-8, xchg);
1371     XCHG(top_border,      src_y,   xchg);
1372     XCHG(top_border+8,    src_y+8, 1);
1373     if (mb_x < mb_width-1)
1374         XCHG(top_border+32, src_y+16, 1);
1375
1376     // only copy chroma for normal loop filter
1377     // or to initialize the top row to 127
1378     if (!simple || !mb_y) {
1379         XCHG(top_border_m1+16, src_cb-8, xchg);
1380         XCHG(top_border_m1+24, src_cr-8, xchg);
1381         XCHG(top_border+16,    src_cb, 1);
1382         XCHG(top_border+24,    src_cr, 1);
1383     }
1384 }
1385
1386 static av_always_inline
1387 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1388 {
1389     if (!mb_x) {
1390         return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1391     } else {
1392         return mb_y ? mode : LEFT_DC_PRED8x8;
1393     }
1394 }
1395
1396 static av_always_inline
1397 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1398 {
1399     if (!mb_x) {
1400         return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1401     } else {
1402         return mb_y ? mode : HOR_PRED8x8;
1403     }
1404 }
1405
1406 static av_always_inline
1407 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1408 {
1409     switch (mode) {
1410     case DC_PRED8x8:
1411         return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1412     case VERT_PRED8x8:
1413         return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1414     case HOR_PRED8x8:
1415         return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1416     case PLANE_PRED8x8 /*TM*/:
1417         return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1418     }
1419     return mode;
1420 }
1421
1422 static av_always_inline
1423 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1424 {
1425     if (!mb_x) {
1426         return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1427     } else {
1428         return mb_y ? mode : HOR_VP8_PRED;
1429     }
1430 }
1431
1432 static av_always_inline
1433 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf, int vp7)
1434 {
1435     switch (mode) {
1436     case VERT_PRED:
1437         if (!mb_x && mb_y) {
1438             *copy_buf = 1;
1439             return mode;
1440         }
1441         /* fall-through */
1442     case DIAG_DOWN_LEFT_PRED:
1443     case VERT_LEFT_PRED:
1444         return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1445     case HOR_PRED:
1446         if (!mb_y) {
1447             *copy_buf = 1;
1448             return mode;
1449         }
1450         /* fall-through */
1451     case HOR_UP_PRED:
1452         return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1453     case TM_VP8_PRED:
1454         return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1455     case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1456     case DIAG_DOWN_RIGHT_PRED:
1457     case VERT_RIGHT_PRED:
1458     case HOR_DOWN_PRED:
1459         if (!mb_y || !mb_x)
1460             *copy_buf = 1;
1461         return mode;
1462     }
1463     return mode;
1464 }
1465
1466 static av_always_inline
1467 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1468                    VP8Macroblock *mb, int mb_x, int mb_y, int vp7)
1469 {
1470     int x, y, mode, nnz;
1471     uint32_t tr;
1472
1473     // for the first row, we need to run xchg_mb_border to init the top edge to 127
1474     // otherwise, skip it if we aren't going to deblock
1475     if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1476         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1477                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1478                        s->filter.simple, 1);
1479
1480     if (mb->mode < MODE_I4x4) {
1481         mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, vp7);
1482         s->hpc.pred16x16[mode](dst[0], s->linesize);
1483     } else {
1484         uint8_t *ptr = dst[0];
1485         uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1486         const uint8_t lo = vp7 ? 128 : 127;
1487         const uint8_t hi = vp7 ? 128 : 129;
1488         uint8_t tr_top[4] = { lo, lo, lo, lo };
1489
1490         // all blocks on the right edge of the macroblock use bottom edge
1491         // the top macroblock for their topright edge
1492         uint8_t *tr_right = ptr - s->linesize + 16;
1493
1494         // if we're on the right edge of the frame, said edge is extended
1495         // from the top macroblock
1496         if (mb_y &&
1497             mb_x == s->mb_width-1) {
1498             tr = tr_right[-1]*0x01010101u;
1499             tr_right = (uint8_t *)&tr;
1500         }
1501
1502         if (mb->skip)
1503             AV_ZERO128(td->non_zero_count_cache);
1504
1505         for (y = 0; y < 4; y++) {
1506             uint8_t *topright = ptr + 4 - s->linesize;
1507             for (x = 0; x < 4; x++) {
1508                 int copy = 0, linesize = s->linesize;
1509                 uint8_t *dst = ptr+4*x;
1510                 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1511
1512                 if ((y == 0 || x == 3) && mb_y == 0) {
1513                     topright = tr_top;
1514                 } else if (x == 3)
1515                     topright = tr_right;
1516
1517                 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy, vp7);
1518                 if (copy) {
1519                     dst = copy_dst + 12;
1520                     linesize = 8;
1521                     if (!(mb_y + y)) {
1522                         copy_dst[3] = lo;
1523                         AV_WN32A(copy_dst+4, lo * 0x01010101U);
1524                     } else {
1525                         AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1526                         if (!(mb_x + x)) {
1527                             copy_dst[3] = hi;
1528                         } else {
1529                             copy_dst[3] = ptr[4*x-s->linesize-1];
1530                         }
1531                     }
1532                     if (!(mb_x + x)) {
1533                         copy_dst[11] =
1534                         copy_dst[19] =
1535                         copy_dst[27] =
1536                         copy_dst[35] = hi;
1537                     } else {
1538                         copy_dst[11] = ptr[4*x              -1];
1539                         copy_dst[19] = ptr[4*x+s->linesize  -1];
1540                         copy_dst[27] = ptr[4*x+s->linesize*2-1];
1541                         copy_dst[35] = ptr[4*x+s->linesize*3-1];
1542                     }
1543                 }
1544                 s->hpc.pred4x4[mode](dst, topright, linesize);
1545                 if (copy) {
1546                     AV_COPY32(ptr+4*x              , copy_dst+12);
1547                     AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
1548                     AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1549                     AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1550                 }
1551
1552                 nnz = td->non_zero_count_cache[y][x];
1553                 if (nnz) {
1554                     if (nnz == 1)
1555                         s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1556                     else
1557                         s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1558                 }
1559                 topright += 4;
1560             }
1561
1562             ptr   += 4*s->linesize;
1563             intra4x4 += 4;
1564         }
1565     }
1566
1567     mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y, vp7);
1568     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1569     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1570
1571     if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1572         xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1573                        s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1574                        s->filter.simple, 0);
1575 }
1576
1577 static const uint8_t subpel_idx[3][8] = {
1578     { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1579                                 // also function pointer index
1580     { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1581     { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1582 };
1583
1584 /**
1585  * luma MC function
1586  *
1587  * @param s VP8 decoding context
1588  * @param dst target buffer for block data at block position
1589  * @param ref reference picture buffer at origin (0, 0)
1590  * @param mv motion vector (relative to block position) to get pixel data from
1591  * @param x_off horizontal position of block from origin (0, 0)
1592  * @param y_off vertical position of block from origin (0, 0)
1593  * @param block_w width of block (16, 8 or 4)
1594  * @param block_h height of block (always same as block_w)
1595  * @param width width of src/dst plane data
1596  * @param height height of src/dst plane data
1597  * @param linesize size of a single line of plane data, including padding
1598  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1599  */
1600 static av_always_inline
1601 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1602                  ThreadFrame *ref, const VP56mv *mv,
1603                  int x_off, int y_off, int block_w, int block_h,
1604                  int width, int height, ptrdiff_t linesize,
1605                  vp8_mc_func mc_func[3][3])
1606 {
1607     uint8_t *src = ref->f->data[0];
1608
1609     if (AV_RN32A(mv)) {
1610         int src_linesize = linesize;
1611
1612         int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1613         int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1614
1615         x_off += mv->x >> 2;
1616         y_off += mv->y >> 2;
1617
1618         // edge emulation
1619         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1620         src += y_off * linesize + x_off;
1621         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1622             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1623             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1624                                      src - my_idx * linesize - mx_idx,
1625                                      EDGE_EMU_LINESIZE, linesize,
1626                                      block_w + subpel_idx[1][mx],
1627                                      block_h + subpel_idx[1][my],
1628                                      x_off - mx_idx, y_off - my_idx, width, height);
1629             src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1630             src_linesize = EDGE_EMU_LINESIZE;
1631         }
1632         mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1633     } else {
1634         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1635         mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1636     }
1637 }
1638
1639 /**
1640  * chroma MC function
1641  *
1642  * @param s VP8 decoding context
1643  * @param dst1 target buffer for block data at block position (U plane)
1644  * @param dst2 target buffer for block data at block position (V plane)
1645  * @param ref reference picture buffer at origin (0, 0)
1646  * @param mv motion vector (relative to block position) to get pixel data from
1647  * @param x_off horizontal position of block from origin (0, 0)
1648  * @param y_off vertical position of block from origin (0, 0)
1649  * @param block_w width of block (16, 8 or 4)
1650  * @param block_h height of block (always same as block_w)
1651  * @param width width of src/dst plane data
1652  * @param height height of src/dst plane data
1653  * @param linesize size of a single line of plane data, including padding
1654  * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1655  */
1656 static av_always_inline
1657 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1658                    ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1659                    int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1660                    vp8_mc_func mc_func[3][3])
1661 {
1662     uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1663
1664     if (AV_RN32A(mv)) {
1665         int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1666         int my = mv->y&7, my_idx = subpel_idx[0][my];
1667
1668         x_off += mv->x >> 3;
1669         y_off += mv->y >> 3;
1670
1671         // edge emulation
1672         src1 += y_off * linesize + x_off;
1673         src2 += y_off * linesize + x_off;
1674         ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1675         if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
1676             y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1677             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1678                                      src1 - my_idx * linesize - mx_idx,
1679                                      EDGE_EMU_LINESIZE, linesize,
1680                                      block_w + subpel_idx[1][mx],
1681                                      block_h + subpel_idx[1][my],
1682                                      x_off - mx_idx, y_off - my_idx, width, height);
1683             src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1684             mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1685
1686             s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1687                                      src2 - my_idx * linesize - mx_idx,
1688                                      EDGE_EMU_LINESIZE, linesize,
1689                                      block_w + subpel_idx[1][mx],
1690                                      block_h + subpel_idx[1][my],
1691                                      x_off - mx_idx, y_off - my_idx, width, height);
1692             src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1693             mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1694         } else {
1695             mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1696             mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1697         }
1698     } else {
1699         ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1700         mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1701         mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1702     }
1703 }
1704
1705 static av_always_inline
1706 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1707                  ThreadFrame *ref_frame, int x_off, int y_off,
1708                  int bx_off, int by_off,
1709                  int block_w, int block_h,
1710                  int width, int height, VP56mv *mv)
1711 {
1712     VP56mv uvmv = *mv;
1713
1714     /* Y */
1715     vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1716                 ref_frame, mv, x_off + bx_off, y_off + by_off,
1717                 block_w, block_h, width, height, s->linesize,
1718                 s->put_pixels_tab[block_w == 8]);
1719
1720     /* U/V */
1721     if (s->profile == 3) { /* this block only applies VP8; it is safe to check only the profile, as VP7 profile <= 1 */
1722         uvmv.x &= ~7;
1723         uvmv.y &= ~7;
1724     }
1725     x_off   >>= 1; y_off   >>= 1;
1726     bx_off  >>= 1; by_off  >>= 1;
1727     width   >>= 1; height  >>= 1;
1728     block_w >>= 1; block_h >>= 1;
1729     vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1730                   dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1731                   &uvmv, x_off + bx_off, y_off + by_off,
1732                   block_w, block_h, width, height, s->uvlinesize,
1733                   s->put_pixels_tab[1 + (block_w == 4)]);
1734 }
1735
1736 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1737  * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1738 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1739 {
1740     /* Don't prefetch refs that haven't been used very often this frame. */
1741     if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1742         int x_off = mb_x << 4, y_off = mb_y << 4;
1743         int mx = (mb->mv.x>>2) + x_off + 8;
1744         int my = (mb->mv.y>>2) + y_off;
1745         uint8_t **src= s->framep[ref]->tf.f->data;
1746         int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1747         /* For threading, a ff_thread_await_progress here might be useful, but
1748          * it actually slows down the decoder. Since a bad prefetch doesn't
1749          * generate bad decoder output, we don't run it here. */
1750         s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1751         off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1752         s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1753     }
1754 }
1755
1756 /**
1757  * Apply motion vectors to prediction buffer, chapter 18.
1758  */
1759 static av_always_inline
1760 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1761                    VP8Macroblock *mb, int mb_x, int mb_y)
1762 {
1763     int x_off = mb_x << 4, y_off = mb_y << 4;
1764     int width = 16*s->mb_width, height = 16*s->mb_height;
1765     ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1766     VP56mv *bmv = mb->bmv;
1767
1768     switch (mb->partitioning) {
1769     case VP8_SPLITMVMODE_NONE:
1770         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1771                     0, 0, 16, 16, width, height, &mb->mv);
1772         break;
1773     case VP8_SPLITMVMODE_4x4: {
1774         int x, y;
1775         VP56mv uvmv;
1776
1777         /* Y */
1778         for (y = 0; y < 4; y++) {
1779             for (x = 0; x < 4; x++) {
1780                 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1781                             ref, &bmv[4*y + x],
1782                             4*x + x_off, 4*y + y_off, 4, 4,
1783                             width, height, s->linesize,
1784                             s->put_pixels_tab[2]);
1785             }
1786         }
1787
1788         /* U/V */
1789         x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1790         for (y = 0; y < 2; y++) {
1791             for (x = 0; x < 2; x++) {
1792                 uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
1793                          mb->bmv[ 2*y    * 4 + 2*x+1].x +
1794                          mb->bmv[(2*y+1) * 4 + 2*x  ].x +
1795                          mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1796                 uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
1797                          mb->bmv[ 2*y    * 4 + 2*x+1].y +
1798                          mb->bmv[(2*y+1) * 4 + 2*x  ].y +
1799                          mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1800                 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1801                 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1802                 if (s->profile == 3) {
1803                     uvmv.x &= ~7;
1804                     uvmv.y &= ~7;
1805                 }
1806                 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1807                               dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1808                               4*x + x_off, 4*y + y_off, 4, 4,
1809                               width, height, s->uvlinesize,
1810                               s->put_pixels_tab[2]);
1811             }
1812         }
1813         break;
1814     }
1815     case VP8_SPLITMVMODE_16x8:
1816         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1817                     0, 0, 16, 8, width, height, &bmv[0]);
1818         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1819                     0, 8, 16, 8, width, height, &bmv[1]);
1820         break;
1821     case VP8_SPLITMVMODE_8x16:
1822         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1823                     0, 0, 8, 16, width, height, &bmv[0]);
1824         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1825                     8, 0, 8, 16, width, height, &bmv[1]);
1826         break;
1827     case VP8_SPLITMVMODE_8x8:
1828         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1829                     0, 0, 8, 8, width, height, &bmv[0]);
1830         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1831                     8, 0, 8, 8, width, height, &bmv[1]);
1832         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1833                     0, 8, 8, 8, width, height, &bmv[2]);
1834         vp8_mc_part(s, td, dst, ref, x_off, y_off,
1835                     8, 8, 8, 8, width, height, &bmv[3]);
1836         break;
1837     }
1838 }
1839
1840 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1841                                      uint8_t *dst[3], VP8Macroblock *mb)
1842 {
1843     int x, y, ch;
1844
1845     if (mb->mode != MODE_I4x4) {
1846         uint8_t *y_dst = dst[0];
1847         for (y = 0; y < 4; y++) {
1848             uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1849             if (nnz4) {
1850                 if (nnz4&~0x01010101) {
1851                     for (x = 0; x < 4; x++) {
1852                         if ((uint8_t)nnz4 == 1)
1853                             s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1854                         else if((uint8_t)nnz4 > 1)
1855                             s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1856                         nnz4 >>= 8;
1857                         if (!nnz4)
1858                             break;
1859                     }
1860                 } else {
1861                     s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1862                 }
1863             }
1864             y_dst += 4*s->linesize;
1865         }
1866     }
1867
1868     for (ch = 0; ch < 2; ch++) {
1869         uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1870         if (nnz4) {
1871             uint8_t *ch_dst = dst[1+ch];
1872             if (nnz4&~0x01010101) {
1873                 for (y = 0; y < 2; y++) {
1874                     for (x = 0; x < 2; x++) {
1875                         if ((uint8_t)nnz4 == 1)
1876                             s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1877                         else if((uint8_t)nnz4 > 1)
1878                             s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1879                         nnz4 >>= 8;
1880                         if (!nnz4)
1881                             goto chroma_idct_end;
1882                     }
1883                     ch_dst += 4*s->uvlinesize;
1884                 }
1885             } else {
1886                 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1887             }
1888         }
1889 chroma_idct_end: ;
1890     }
1891 }
1892
1893 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f, int vp7)
1894 {
1895     int interior_limit, filter_level;
1896
1897     if (s->segmentation.enabled) {
1898         filter_level = s->segmentation.filter_level[mb->segment];
1899         if (!s->segmentation.absolute_vals)
1900             filter_level += s->filter.level;
1901     } else
1902         filter_level = s->filter.level;
1903
1904     if (s->lf_delta.enabled) {
1905         filter_level += s->lf_delta.ref[mb->ref_frame];
1906         filter_level += s->lf_delta.mode[mb->mode];
1907     }
1908
1909     filter_level = av_clip_uintp2(filter_level, 6);
1910
1911     interior_limit = filter_level;
1912     if (s->filter.sharpness) {
1913         interior_limit >>= (s->filter.sharpness + 3) >> 2;
1914         interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1915     }
1916     interior_limit = FFMAX(interior_limit, 1);
1917
1918     f->filter_level = filter_level;
1919     f->inner_limit = interior_limit;
1920     f->inner_filter = vp7 || !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1921 }
1922
1923 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y, int vp7)
1924 {
1925     int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
1926     int filter_level = f->filter_level;
1927     int inner_limit = f->inner_limit;
1928     int inner_filter = f->inner_filter;
1929     int linesize = s->linesize;
1930     int uvlinesize = s->uvlinesize;
1931     static const uint8_t hev_thresh_lut[2][64] = {
1932         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1933           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1934           3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1935           3, 3, 3, 3 },
1936         { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1937           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1938           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1939           2, 2, 2, 2 }
1940     };
1941
1942     if (!filter_level)
1943         return;
1944
1945     if (vp7) {
1946         bedge_lim_y  =   filter_level;
1947         bedge_lim_uv = 2*filter_level;
1948         mbedge_lim   =   filter_level + 2;
1949     } else {
1950         bedge_lim_y  =
1951         bedge_lim_uv = 2*filter_level + inner_limit;
1952         mbedge_lim   =   bedge_lim_y + 4;
1953     }
1954
1955     hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1956
1957     if (mb_x) {
1958         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1959                                        mbedge_lim, inner_limit, hev_thresh);
1960         s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1961                                        mbedge_lim, inner_limit, hev_thresh);
1962     }
1963
1964 #define H_LOOP_FILTER_16Y_INNER(cond) \
1965     if (cond && inner_filter) {\
1966         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim_y,\
1967                                              inner_limit, hev_thresh);\
1968         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim_y,\
1969                                              inner_limit, hev_thresh);\
1970         s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim_y,\
1971                                              inner_limit, hev_thresh);\
1972         s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,\
1973                                              uvlinesize,  bedge_lim_uv,\
1974                                              inner_limit, hev_thresh);\
1975     }
1976
1977     H_LOOP_FILTER_16Y_INNER(!vp7)
1978
1979     if (mb_y) {
1980         s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1981                                        mbedge_lim, inner_limit, hev_thresh);
1982         s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1983                                        mbedge_lim, inner_limit, hev_thresh);
1984     }
1985
1986     if (inner_filter) {
1987         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1988                                              linesize,    bedge_lim_y,
1989                                              inner_limit, hev_thresh);
1990         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1991                                              linesize,    bedge_lim_y,
1992                                              inner_limit, hev_thresh);
1993         s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1994                                              linesize,    bedge_lim_y,
1995                                              inner_limit, hev_thresh);
1996         s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1997                                              dst[2] + 4 * uvlinesize,
1998                                              uvlinesize,  bedge_lim_uv,
1999                                              inner_limit, hev_thresh);
2000     }
2001
2002     H_LOOP_FILTER_16Y_INNER(vp7)
2003 }
2004
2005 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
2006 {
2007     int mbedge_lim, bedge_lim;
2008     int filter_level = f->filter_level;
2009     int inner_limit = f->inner_limit;
2010     int inner_filter = f->inner_filter;
2011     int linesize = s->linesize;
2012
2013     if (!filter_level)
2014         return;
2015
2016      bedge_lim = 2*filter_level + inner_limit;
2017     mbedge_lim = bedge_lim + 4;
2018
2019     if (mb_x)
2020         s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2021     if (inner_filter) {
2022         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
2023         s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
2024         s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
2025     }
2026
2027     if (mb_y)
2028         s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2029     if (inner_filter) {
2030         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
2031         s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
2032         s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
2033     }
2034 }
2035
2036 #define MARGIN (16 << 2)
2037 static av_always_inline
2038 void decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2039                                    VP8Frame *prev_frame, int vp7)
2040 {
2041     VP8Context *s = avctx->priv_data;
2042     int mb_x, mb_y;
2043
2044     s->mv_min.y = -MARGIN;
2045     s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2046     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2047         VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
2048         int mb_xy = mb_y*s->mb_width;
2049
2050         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
2051
2052         s->mv_min.x = -MARGIN;
2053         s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2054         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2055             if (mb_y == 0)
2056                 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
2057             decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2058                            prev_frame && prev_frame->seg_map ?
2059                            prev_frame->seg_map->data + mb_xy : NULL, 1, vp7);
2060             s->mv_min.x -= 64;
2061             s->mv_max.x -= 64;
2062         }
2063         s->mv_min.y -= 64;
2064         s->mv_max.y -= 64;
2065     }
2066 }
2067
2068 #if CONFIG_VP7_DECODER
2069 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2070                                    VP8Frame *prev_frame)
2071 {
2072     decode_mv_mb_modes(avctx, curframe, prev_frame, 1);
2073 }
2074 #endif
2075
2076 #if CONFIG_VP8_DECODER
2077 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2078                                    VP8Frame *prev_frame)
2079 {
2080     decode_mv_mb_modes(avctx, curframe, prev_frame, 0);
2081 }
2082 #endif
2083
2084 #if HAVE_THREADS
2085 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
2086     do {\
2087         int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
2088         if (otd->thread_mb_pos < tmp) {\
2089             pthread_mutex_lock(&otd->lock);\
2090             td->wait_mb_pos = tmp;\
2091             do {\
2092                 if (otd->thread_mb_pos >= tmp)\
2093                     break;\
2094                 pthread_cond_wait(&otd->cond, &otd->lock);\
2095             } while (1);\
2096             td->wait_mb_pos = INT_MAX;\
2097             pthread_mutex_unlock(&otd->lock);\
2098         }\
2099     } while(0);
2100
2101 #define update_pos(td, mb_y, mb_x)\
2102     do {\
2103     int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
2104     int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
2105     int is_null          = (next_td == NULL) || (prev_td == NULL);\
2106     int pos_check        = (is_null) ? 1 :\
2107                             (next_td != td && pos >= next_td->wait_mb_pos) ||\
2108                             (prev_td != td && pos >= prev_td->wait_mb_pos);\
2109     td->thread_mb_pos = pos;\
2110     if (sliced_threading && pos_check) {\
2111         pthread_mutex_lock(&td->lock);\
2112         pthread_cond_broadcast(&td->cond);\
2113         pthread_mutex_unlock(&td->lock);\
2114     }\
2115     } while(0);
2116 #else
2117 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2118 #define update_pos(td, mb_y, mb_x)
2119 #endif
2120
2121 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2122                                         int jobnr, int threadnr, int vp7)
2123 {
2124     VP8Context *s = avctx->priv_data;
2125     VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2126     int mb_y = td->thread_mb_pos>>16;
2127     int mb_x, mb_xy = mb_y*s->mb_width;
2128     int num_jobs = s->num_jobs;
2129     VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2130     VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
2131     VP8Macroblock *mb;
2132     uint8_t *dst[3] = {
2133         curframe->tf.f->data[0] + 16*mb_y*s->linesize,
2134         curframe->tf.f->data[1] +  8*mb_y*s->uvlinesize,
2135         curframe->tf.f->data[2] +  8*mb_y*s->uvlinesize
2136     };
2137     if (mb_y == 0) prev_td = td;
2138     else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
2139     if (mb_y == s->mb_height-1) next_td = td;
2140     else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
2141     if (s->mb_layout == 1)
2142         mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
2143     else {
2144         // Make sure the previous frame has read its segmentation map,
2145         // if we re-use the same map.
2146         if (prev_frame && s->segmentation.enabled &&
2147             !s->segmentation.update_map)
2148             ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2149         mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
2150         memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2151         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
2152     }
2153
2154     if (!vp7 || mb_y == 0)
2155         memset(td->left_nnz, 0, sizeof(td->left_nnz));
2156
2157     s->mv_min.x = -MARGIN;
2158     s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;
2159
2160     for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2161         // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2162         if (prev_td != td) {
2163             if (threadnr != 0) {
2164                 check_thread_pos(td, prev_td, mb_x+(vp7?2:1), mb_y-(vp7?2:1));
2165             } else {
2166                 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+(vp7?2:1)), mb_y-(vp7?2:1));
2167             }
2168         }
2169
2170         s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2171         s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
2172
2173         if (!s->mb_layout)
2174             decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2175                            prev_frame && prev_frame->seg_map ?
2176                            prev_frame->seg_map->data + mb_xy : NULL, 0, vp7);
2177
2178         prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2179
2180         if (!mb->skip)
2181             decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, vp7);
2182
2183         if (mb->mode <= MODE_I4x4)
2184             intra_predict(s, td, dst, mb, mb_x, mb_y, vp7);
2185         else
2186             inter_predict(s, td, dst, mb, mb_x, mb_y);
2187
2188         prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2189
2190         if (!mb->skip) {
2191             idct_mb(s, td, dst, mb);
2192         } else {
2193             AV_ZERO64(td->left_nnz);
2194             AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned
2195
2196             // Reset DC block predictors if they would exist if the mb had coefficients
2197             if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2198                 td->left_nnz[8]     = 0;
2199                 s->top_nnz[mb_x][8] = 0;
2200             }
2201         }
2202
2203         if (s->deblock_filter)
2204             filter_level_for_mb(s, mb, &td->filter_strength[mb_x], vp7);
2205
2206         if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
2207             if (s->filter.simple)
2208                 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
2209             else
2210                 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2211         }
2212
2213         prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2214
2215         dst[0] += 16;
2216         dst[1] += 8;
2217         dst[2] += 8;
2218         s->mv_min.x -= 64;
2219         s->mv_max.x -= 64;
2220
2221         if (mb_x == s->mb_width+1) {
2222             update_pos(td, mb_y, s->mb_width+3);
2223         } else {
2224             update_pos(td, mb_y, mb_x);
2225         }
2226     }
2227 }
2228
2229 #if CONFIG_VP7_DECODER
2230 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2231                                         int jobnr, int threadnr)
2232 {
2233     decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2234 }
2235 #endif
2236
2237 #if CONFIG_VP8_DECODER
2238 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2239                                         int jobnr, int threadnr)
2240 {
2241     decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2242 }
2243 #endif
2244
2245 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2246                               int jobnr, int threadnr, int vp7)
2247 {
2248     VP8Context *s = avctx->priv_data;
2249     VP8ThreadData *td = &s->thread_data[threadnr];
2250     int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
2251     AVFrame *curframe = s->curframe->tf.f;
2252     VP8Macroblock *mb;
2253     VP8ThreadData *prev_td, *next_td;
2254     uint8_t *dst[3] = {
2255         curframe->data[0] + 16*mb_y*s->linesize,
2256         curframe->data[1] +  8*mb_y*s->uvlinesize,
2257         curframe->data[2] +  8*mb_y*s->uvlinesize
2258     };
2259
2260     if (s->mb_layout == 1)
2261         mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
2262     else
2263         mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
2264
2265     if (mb_y == 0) prev_td = td;
2266     else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
2267     if (mb_y == s->mb_height-1) next_td = td;
2268     else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
2269
2270     for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2271         VP8FilterStrength *f = &td->filter_strength[mb_x];
2272         if (prev_td != td) {
2273             check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
2274         }
2275         if (next_td != td)
2276             if (next_td != &s->thread_data[0]) {
2277                 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
2278             }
2279
2280         if (num_jobs == 1) {
2281             if (s->filter.simple)
2282                 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
2283             else
2284                 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2285         }
2286
2287         if (s->filter.simple)
2288             filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2289         else
2290             filter_mb(s, dst, f, mb_x, mb_y, vp7);
2291         dst[0] += 16;
2292         dst[1] += 8;
2293         dst[2] += 8;
2294
2295         update_pos(td, mb_y, (s->mb_width+3) + mb_x);
2296     }
2297 }
2298
2299 #if CONFIG_VP7_DECODER
2300 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2301                               int jobnr, int threadnr)
2302 {
2303     filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2304 }
2305 #endif
2306
2307 #if CONFIG_VP8_DECODER
2308 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2309                               int jobnr, int threadnr)
2310 {
2311     filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2312 }
2313 #endif
2314
2315 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2316                                     int jobnr, int threadnr)
2317 {
2318     VP8Context *s = avctx->priv_data;
2319     VP8ThreadData *td = &s->thread_data[jobnr];
2320     VP8ThreadData *next_td = NULL, *prev_td = NULL;
2321     VP8Frame *curframe = s->curframe;
2322     int mb_y, num_jobs = s->num_jobs;
2323     td->thread_nr = threadnr;
2324     for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2325         if (mb_y >= s->mb_height) break;
2326         td->thread_mb_pos = mb_y<<16;
2327         s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2328         if (s->deblock_filter)
2329             s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2330         update_pos(td, mb_y, INT_MAX & 0xFFFF);
2331
2332         s->mv_min.y -= 64;
2333         s->mv_max.y -= 64;
2334
2335         if (avctx->active_thread_type == FF_THREAD_FRAME)
2336             ff_thread_report_progress(&curframe->tf, mb_y, 0);
2337     }
2338
2339     return 0;
2340 }
2341
2342 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2343                         AVPacket *avpkt)
2344 {
2345     VP8Context *s = avctx->priv_data;
2346     int ret, i, referenced, num_jobs;
2347     enum AVDiscard skip_thresh;
2348     VP8Frame *av_uninit(curframe), *prev_frame;
2349
2350     if ((ret = VPX(s->vp7, decode_frame_header)(s, avpkt->data, avpkt->size)) < 0)
2351         goto err;
2352
2353     prev_frame = s->framep[VP56_FRAME_CURRENT];
2354
2355     referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
2356                                 || s->update_altref == VP56_FRAME_CURRENT;
2357
2358     skip_thresh = !referenced ? AVDISCARD_NONREF :
2359                     !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
2360
2361     if (avctx->skip_frame >= skip_thresh) {
2362         s->invisible = 1;
2363         memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2364         goto skip_decode;
2365     }
2366     s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2367
2368     // release no longer referenced frames
2369     for (i = 0; i < 5; i++)
2370         if (s->frames[i].tf.f->data[0] &&
2371             &s->frames[i] != prev_frame &&
2372             &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2373             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2374             &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2375             vp8_release_frame(s, &s->frames[i]);
2376
2377     curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2378
2379     // Given that arithmetic probabilities are updated every frame, it's quite likely
2380     // that the values we have on a random interframe are complete junk if we didn't
2381     // start decode on a keyframe. So just don't display anything rather than junk.
2382     if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2383                          !s->framep[VP56_FRAME_GOLDEN] ||
2384                          !s->framep[VP56_FRAME_GOLDEN2])) {
2385         av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
2386         ret = AVERROR_INVALIDDATA;
2387         goto err;
2388     }
2389
2390     curframe->tf.f->key_frame = s->keyframe;
2391     curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
2392     if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2393         goto err;
2394
2395     // check if golden and altref are swapped
2396     if (s->update_altref != VP56_FRAME_NONE) {
2397         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
2398     } else {
2399         s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
2400     }
2401     if (s->update_golden != VP56_FRAME_NONE) {
2402         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
2403     } else {
2404         s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
2405     }
2406     if (s->update_last) {
2407         s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2408     } else {
2409         s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2410     }
2411     s->next_framep[VP56_FRAME_CURRENT]      = curframe;
2412
2413     ff_thread_finish_setup(avctx);
2414
2415     s->linesize   = curframe->tf.f->linesize[0];
2416     s->uvlinesize = curframe->tf.f->linesize[1];
2417
2418     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
2419     /* Zero macroblock structures for top/top-left prediction from outside the frame. */
2420     if (!s->mb_layout)
2421         memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
2422     if (!s->mb_layout && s->keyframe)
2423         memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
2424
2425     memset(s->ref_count, 0, sizeof(s->ref_count));
2426
2427
2428     if (s->mb_layout == 1) {
2429         // Make sure the previous frame has read its segmentation map,
2430         // if we re-use the same map.
2431         if (prev_frame && s->segmentation.enabled &&
2432             !s->segmentation.update_map)
2433             ff_thread_await_progress(&prev_frame->tf, 1, 0);
2434         VPX(s->vp7, decode_mv_mb_modes)(avctx, curframe, prev_frame);
2435     }
2436
2437     if (avctx->active_thread_type == FF_THREAD_FRAME)
2438         num_jobs = 1;
2439     else
2440         num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2441     s->num_jobs   = num_jobs;
2442     s->curframe   = curframe;
2443     s->prev_frame = prev_frame;
2444     s->mv_min.y   = -MARGIN;
2445     s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
2446     for (i = 0; i < MAX_THREADS; i++) {
2447         s->thread_data[i].thread_mb_pos = 0;
2448         s->thread_data[i].wait_mb_pos = INT_MAX;
2449     }
2450     avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
2451
2452     ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2453     memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2454
2455 skip_decode:
2456     // if future frames don't use the updated probabilities,
2457     // reset them to the values we saved
2458     if (!s->update_probabilities)
2459         s->prob[0] = s->prob[1];
2460
2461     if (!s->invisible) {
2462         if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2463             return ret;
2464         *got_frame      = 1;
2465     }
2466
2467     return avpkt->size;
2468 err:
2469     memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2470     return ret;
2471 }
2472
2473 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2474 {
2475     VP8Context *s = avctx->priv_data;
2476     int i;
2477
2478     vp8_decode_flush_impl(avctx, 1);
2479     for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2480         av_frame_free(&s->frames[i].tf.f);
2481
2482     return 0;
2483 }
2484
2485 static av_cold int vp8_init_frames(VP8Context *s)
2486 {
2487     int i;
2488     for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2489         s->frames[i].tf.f = av_frame_alloc();
2490         if (!s->frames[i].tf.f)
2491             return AVERROR(ENOMEM);
2492     }
2493     return 0;
2494 }
2495
2496 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2497 {
2498     VP8Context *s = avctx->priv_data;
2499     int ret;
2500
2501     s->avctx = avctx;
2502     s->vp7   = avctx->codec->id == AV_CODEC_ID_VP7;
2503     avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2504     avctx->internal->allocate_progress = 1;
2505
2506     ff_videodsp_init(&s->vdsp, 8);
2507     ff_h264_pred_init(&s->hpc, s->vp7 ? AV_CODEC_ID_VP7 : AV_CODEC_ID_VP8, 8, 1);
2508     ff_vp8dsp_init(&s->vp8dsp, s->vp7);
2509
2510     s->decode_mb_row_no_filter = VPX(s->vp7, decode_mb_row_no_filter);
2511     s->filter_mb_row           = VPX(s->vp7, filter_mb_row);
2512
2513     if ((ret = vp8_init_frames(s)) < 0) {
2514         ff_vp8_decode_free(avctx);
2515         return ret;
2516     }
2517
2518     return 0;
2519 }
2520
2521 #if CONFIG_VP8_DECODER
2522 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2523 {
2524     VP8Context *s = avctx->priv_data;
2525     int ret;
2526
2527     s->avctx = avctx;
2528
2529     if ((ret = vp8_init_frames(s)) < 0) {
2530         ff_vp8_decode_free(avctx);
2531         return ret;
2532     }
2533
2534     return 0;
2535 }
2536
2537 #define REBASE(pic) \
2538     pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2539
2540 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2541 {
2542     VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2543     int i;
2544
2545     if (s->macroblocks_base &&
2546         (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2547         free_buffers(s);
2548         s->mb_width  = s_src->mb_width;
2549         s->mb_height = s_src->mb_height;
2550     }
2551
2552     s->prob[0] = s_src->prob[!s_src->update_probabilities];
2553     s->segmentation = s_src->segmentation;
2554     s->lf_delta = s_src->lf_delta;
2555     memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2556
2557     for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2558         if (s_src->frames[i].tf.f->data[0]) {
2559             int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2560             if (ret < 0)
2561                 return ret;
2562         }
2563     }
2564
2565     s->framep[0] = REBASE(s_src->next_framep[0]);
2566     s->framep[1] = REBASE(s_src->next_framep[1]);
2567     s->framep[2] = REBASE(s_src->next_framep[2]);
2568     s->framep[3] = REBASE(s_src->next_framep[3]);
2569
2570     return 0;
2571 }
2572 #endif
2573
2574 #if CONFIG_VP7_DECODER
2575 AVCodec ff_vp7_decoder = {
2576     .name                  = "vp7",
2577     .long_name             = NULL_IF_CONFIG_SMALL("On2 VP7"),
2578     .type                  = AVMEDIA_TYPE_VIDEO,
2579     .id                    = AV_CODEC_ID_VP7,
2580     .priv_data_size        = sizeof(VP8Context),
2581     .init                  = ff_vp8_decode_init,
2582     .close                 = ff_vp8_decode_free,
2583     .decode                = ff_vp8_decode_frame,
2584     .capabilities          = CODEC_CAP_DR1,
2585     .flush                 = vp8_decode_flush,
2586 };
2587 #endif
2588
2589 #if CONFIG_VP8_DECODER
2590 AVCodec ff_vp8_decoder = {
2591     .name                  = "vp8",
2592     .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
2593     .type                  = AVMEDIA_TYPE_VIDEO,
2594     .id                    = AV_CODEC_ID_VP8,
2595     .priv_data_size        = sizeof(VP8Context),
2596     .init                  = ff_vp8_decode_init,
2597     .close                 = ff_vp8_decode_free,
2598     .decode                = ff_vp8_decode_frame,
2599     .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2600     .flush                 = vp8_decode_flush,
2601     .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2602     .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2603 };
2604 #endif
2605