2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
82 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
83 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
86 typedef struct VP9Block {
87 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
88 enum FilterMode filter;
89 VP56mv mv[4 /* b_idx */][2 /* ref */];
91 enum TxfmMode tx, uvtx;
93 enum BlockPartition bp;
96 typedef struct VP9Context {
103 VP9Block *b_base, *b;
105 int row, row7, col, col7;
107 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
113 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
131 #define REF_FRAME_MVPAIR 1
132 #define REF_FRAME_SEGMAP 2
139 uint8_t mblim_lut[64];
147 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
149 #define MAX_SEGMENT 8
153 uint8_t absolute_vals;
159 uint8_t skip_enabled;
168 unsigned log2_tile_cols, log2_tile_rows;
169 unsigned tile_cols, tile_rows;
170 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
172 unsigned sb_cols, sb_rows, rows, cols;
175 uint8_t coef[4][2][2][6][6][3];
179 uint8_t coef[4][2][2][6][6][11];
184 unsigned y_mode[4][10];
185 unsigned uv_mode[10][10];
186 unsigned filter[4][3];
187 unsigned mv_mode[7][4];
188 unsigned intra[4][2];
190 unsigned single_ref[5][2][2];
191 unsigned comp_ref[5][2];
192 unsigned tx32p[2][4];
193 unsigned tx16p[2][3];
196 unsigned mv_joint[4];
199 unsigned classes[11];
201 unsigned bits[10][2];
202 unsigned class0_fp[2][4];
204 unsigned class0_hp[2];
207 unsigned partition[4][4][4];
208 unsigned coef[4][2][2][6][6][3];
209 unsigned eob[4][2][2][6][6][2];
211 enum TxfmMode txfmmode;
212 enum CompPredMode comppredmode;
214 // contextual (left/above) cache
215 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
216 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
217 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
218 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
219 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
227 uint8_t *above_partition_ctx;
228 uint8_t *above_mode_ctx;
229 // FIXME maybe merge some of the below in a flags field?
230 uint8_t *above_y_nnz_ctx;
231 uint8_t *above_uv_nnz_ctx[2];
232 uint8_t *above_skip_ctx; // 1bit
233 uint8_t *above_txfm_ctx; // 2bit
234 uint8_t *above_segpred_ctx; // 1bit
235 uint8_t *above_intra_ctx; // 1bit
236 uint8_t *above_comp_ctx; // 1bit
237 uint8_t *above_ref_ctx; // 2bit
238 uint8_t *above_filter_ctx;
239 VP56mv (*above_mv_ctx)[2];
242 uint8_t *intra_pred_data[3];
243 struct VP9Filter *lflvl;
244 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144];
246 // block reconstruction intermediates
247 int block_alloc_using_2pass;
248 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
249 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
250 struct { int x, y; } min_mv, max_mv;
251 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
252 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
253 uint16_t mvscale[3][2];
254 uint8_t mvstep[3][2];
257 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
259 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
260 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
262 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
263 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
267 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
269 VP9Context *s = ctx->priv_data;
272 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
274 sz = 64 * s->sb_cols * s->sb_rows;
275 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
276 ff_thread_release_buffer(ctx, &f->tf);
277 return AVERROR(ENOMEM);
280 f->segmentation_map = f->extradata->data;
281 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
286 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
288 ff_thread_release_buffer(ctx, &f->tf);
289 av_buffer_unref(&f->extradata);
292 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
296 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
298 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
299 vp9_unref_frame(ctx, dst);
300 return AVERROR(ENOMEM);
303 dst->segmentation_map = src->segmentation_map;
305 dst->uses_2pass = src->uses_2pass;
310 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
312 VP9Context *s = ctx->priv_data;
315 av_assert0(w > 0 && h > 0);
317 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 // FIXME we slightly over-allocate here for subsampled chroma, but a little
331 // bit of padding shouldn't affect performance...
332 p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
334 return AVERROR(ENOMEM);
335 assign(s->intra_pred_data[0], uint8_t *, 64);
336 assign(s->intra_pred_data[1], uint8_t *, 64);
337 assign(s->intra_pred_data[2], uint8_t *, 64);
338 assign(s->above_y_nnz_ctx, uint8_t *, 16);
339 assign(s->above_mode_ctx, uint8_t *, 16);
340 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
341 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
342 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
343 assign(s->above_partition_ctx, uint8_t *, 8);
344 assign(s->above_skip_ctx, uint8_t *, 8);
345 assign(s->above_txfm_ctx, uint8_t *, 8);
346 assign(s->above_segpred_ctx, uint8_t *, 8);
347 assign(s->above_intra_ctx, uint8_t *, 8);
348 assign(s->above_comp_ctx, uint8_t *, 8);
349 assign(s->above_ref_ctx, uint8_t *, 8);
350 assign(s->above_filter_ctx, uint8_t *, 8);
351 assign(s->lflvl, struct VP9Filter *, 1);
354 // these will be re-allocated a little later
355 av_freep(&s->b_base);
356 av_freep(&s->block_base);
361 static int update_block_buffers(AVCodecContext *ctx)
363 VP9Context *s = ctx->priv_data;
364 int chroma_blocks, chroma_eobs;
366 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
370 av_free(s->block_base);
371 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
372 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
373 if (s->frames[CUR_FRAME].uses_2pass) {
374 int sbs = s->sb_cols * s->sb_rows;
376 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
377 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
378 16 * 16 + 2 * chroma_eobs) * sbs);
379 if (!s->b_base || !s->block_base)
380 return AVERROR(ENOMEM);
381 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
382 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
383 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
384 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
385 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
387 s->b_base = av_malloc(sizeof(VP9Block));
388 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
389 16 * 16 + 2 * chroma_eobs);
390 if (!s->b_base || !s->block_base)
391 return AVERROR(ENOMEM);
392 s->uvblock_base[0] = s->block_base + 64 * 64;
393 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
394 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
395 s->uveob_base[0] = s->eob_base + 16 * 16;
396 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
398 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
403 // for some reason the sign bit is at the end, not the start, of a bit sequence
404 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
406 int v = get_bits(gb, n);
407 return get_bits1(gb) ? -v : v;
410 static av_always_inline int inv_recenter_nonneg(int v, int m)
412 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
415 // differential forward probability updates
416 static int update_prob(VP56RangeCoder *c, int p)
418 static const int inv_map_table[254] = {
419 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
420 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
421 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
422 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
423 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
424 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
425 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
426 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
427 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
428 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
429 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
430 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
431 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
432 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
433 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
434 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
435 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
436 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
441 /* This code is trying to do a differential probability update. For a
442 * current probability A in the range [1, 255], the difference to a new
443 * probability of any value can be expressed differentially as 1-A,255-A
444 * where some part of this (absolute range) exists both in positive as
445 * well as the negative part, whereas another part only exists in one
446 * half. We're trying to code this shared part differentially, i.e.
447 * times two where the value of the lowest bit specifies the sign, and
448 * the single part is then coded on top of this. This absolute difference
449 * then again has a value of [0,254], but a bigger value in this range
450 * indicates that we're further away from the original value A, so we
451 * can code this as a VLC code, since higher values are increasingly
452 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
453 * updates vs. the 'fine, exact' updates further down the range, which
454 * adds one extra dimension to this differential update model. */
456 if (!vp8_rac_get(c)) {
457 d = vp8_rac_get_uint(c, 4) + 0;
458 } else if (!vp8_rac_get(c)) {
459 d = vp8_rac_get_uint(c, 4) + 16;
460 } else if (!vp8_rac_get(c)) {
461 d = vp8_rac_get_uint(c, 5) + 32;
463 d = vp8_rac_get_uint(c, 7);
465 d = (d << 1) - 65 + vp8_rac_get(c);
469 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
470 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
473 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
475 static const enum AVColorSpace colorspaces[8] = {
476 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
477 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
479 VP9Context *s = ctx->priv_data;
480 enum AVPixelFormat res;
482 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
483 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
484 if (s->profile == 1) {
485 s->ss_h = s->ss_v = 1;
486 res = AV_PIX_FMT_GBRP;
487 ctx->color_range = AVCOL_RANGE_JPEG;
489 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
490 return AVERROR_INVALIDDATA;
493 static const enum AVPixelFormat pix_fmt_for_ss[2 /* v */][2 /* h */] = {
494 { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
495 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P },
497 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
498 if (s->profile == 1) {
499 s->ss_h = get_bits1(&s->gb);
500 s->ss_v = get_bits1(&s->gb);
501 if ((res = pix_fmt_for_ss[s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
502 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile 1\n");
503 return AVERROR_INVALIDDATA;
504 } else if (get_bits1(&s->gb)) {
505 av_log(ctx, AV_LOG_ERROR, "Profile 1 color details reserved bit set\n");
506 return AVERROR_INVALIDDATA;
509 s->ss_h = s->ss_v = 1;
510 res = AV_PIX_FMT_YUV420P;
517 static int decode_frame_header(AVCodecContext *ctx,
518 const uint8_t *data, int size, int *ref)
520 VP9Context *s = ctx->priv_data;
521 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
522 enum AVPixelFormat fmt = ctx->pix_fmt;
524 const uint8_t *data2;
527 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
528 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
531 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
532 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
533 return AVERROR_INVALIDDATA;
535 s->profile = get_bits1(&s->gb);
536 s->profile |= get_bits1(&s->gb) << 1;
537 if (s->profile > 1) {
538 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", s->profile);
539 return AVERROR_INVALIDDATA;
541 if (get_bits1(&s->gb)) {
542 *ref = get_bits(&s->gb, 3);
545 s->last_keyframe = s->keyframe;
546 s->keyframe = !get_bits1(&s->gb);
547 last_invisible = s->invisible;
548 s->invisible = !get_bits1(&s->gb);
549 s->errorres = get_bits1(&s->gb);
550 s->use_last_frame_mvs = !s->errorres && !last_invisible;
552 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
553 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
554 return AVERROR_INVALIDDATA;
556 if ((fmt = read_colorspace_details(ctx)) < 0)
558 // for profile 1, here follows the subsampling bits
559 s->refreshrefmask = 0xff;
560 w = get_bits(&s->gb, 16) + 1;
561 h = get_bits(&s->gb, 16) + 1;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
565 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
566 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
568 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
569 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
570 return AVERROR_INVALIDDATA;
572 if (s->profile == 1) {
573 if ((fmt = read_colorspace_details(ctx)) < 0)
576 s->ss_h = s->ss_v = 1;
577 fmt = AV_PIX_FMT_YUV420P;
578 ctx->colorspace = AVCOL_SPC_BT470BG;
579 ctx->color_range = AVCOL_RANGE_JPEG;
581 s->refreshrefmask = get_bits(&s->gb, 8);
582 w = get_bits(&s->gb, 16) + 1;
583 h = get_bits(&s->gb, 16) + 1;
584 if (get_bits1(&s->gb)) // display size
585 skip_bits(&s->gb, 32);
587 s->refreshrefmask = get_bits(&s->gb, 8);
588 s->refidx[0] = get_bits(&s->gb, 3);
589 s->signbias[0] = get_bits1(&s->gb);
590 s->refidx[1] = get_bits(&s->gb, 3);
591 s->signbias[1] = get_bits1(&s->gb);
592 s->refidx[2] = get_bits(&s->gb, 3);
593 s->signbias[2] = get_bits1(&s->gb);
594 if (!s->refs[s->refidx[0]].f->data[0] ||
595 !s->refs[s->refidx[1]].f->data[0] ||
596 !s->refs[s->refidx[2]].f->data[0]) {
597 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
598 return AVERROR_INVALIDDATA;
600 if (get_bits1(&s->gb)) {
601 w = s->refs[s->refidx[0]].f->width;
602 h = s->refs[s->refidx[0]].f->height;
603 } else if (get_bits1(&s->gb)) {
604 w = s->refs[s->refidx[1]].f->width;
605 h = s->refs[s->refidx[1]].f->height;
606 } else if (get_bits1(&s->gb)) {
607 w = s->refs[s->refidx[2]].f->width;
608 h = s->refs[s->refidx[2]].f->height;
610 w = get_bits(&s->gb, 16) + 1;
611 h = get_bits(&s->gb, 16) + 1;
613 // Note that in this code, "CUR_FRAME" is actually before we
614 // have formally allocated a frame, and thus actually represents
616 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
617 s->frames[CUR_FRAME].tf.f->height == h;
618 if (get_bits1(&s->gb)) // display size
619 skip_bits(&s->gb, 32);
620 s->highprecisionmvs = get_bits1(&s->gb);
621 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
623 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
624 s->signbias[0] != s->signbias[2];
625 if (s->allowcompinter) {
626 if (s->signbias[0] == s->signbias[1]) {
628 s->varcompref[0] = 0;
629 s->varcompref[1] = 1;
630 } else if (s->signbias[0] == s->signbias[2]) {
632 s->varcompref[0] = 0;
633 s->varcompref[1] = 2;
636 s->varcompref[0] = 1;
637 s->varcompref[1] = 2;
641 for (i = 0; i < 3; i++) {
642 AVFrame *ref = s->refs[s->refidx[i]].f;
643 int refw = ref->width, refh = ref->height;
645 if (refw == w && refh == h) {
646 s->mvscale[i][0] = s->mvscale[i][1] = 0;
648 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
649 av_log(ctx, AV_LOG_ERROR,
650 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
652 return AVERROR_INVALIDDATA;
654 s->mvscale[i][0] = (refw << 14) / w;
655 s->mvscale[i][1] = (refh << 14) / h;
656 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
657 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
662 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
663 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
664 s->framectxid = c = get_bits(&s->gb, 2);
666 /* loopfilter header data */
667 s->filter.level = get_bits(&s->gb, 6);
668 sharp = get_bits(&s->gb, 3);
669 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
670 // the old cache values since they are still valid
671 if (s->filter.sharpness != sharp)
672 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
673 s->filter.sharpness = sharp;
674 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
675 if (get_bits1(&s->gb)) {
676 for (i = 0; i < 4; i++)
677 if (get_bits1(&s->gb))
678 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
679 for (i = 0; i < 2; i++)
680 if (get_bits1(&s->gb))
681 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
685 /* quantization header data */
686 s->yac_qi = get_bits(&s->gb, 8);
687 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
688 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
689 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
690 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
691 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
693 /* segmentation header info */
694 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
695 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
696 for (i = 0; i < 7; i++)
697 s->prob.seg[i] = get_bits1(&s->gb) ?
698 get_bits(&s->gb, 8) : 255;
699 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
700 for (i = 0; i < 3; i++)
701 s->prob.segpred[i] = get_bits1(&s->gb) ?
702 get_bits(&s->gb, 8) : 255;
705 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
706 (w != s->frames[CUR_FRAME].tf.f->width ||
707 h != s->frames[CUR_FRAME].tf.f->height)) {
708 av_log(ctx, AV_LOG_ERROR,
709 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
710 s->segmentation.temporal, s->segmentation.update_map);
711 return AVERROR_INVALIDDATA;
714 if (get_bits1(&s->gb)) {
715 s->segmentation.absolute_vals = get_bits1(&s->gb);
716 for (i = 0; i < 8; i++) {
717 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
718 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
719 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
720 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
721 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
722 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
723 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
727 s->segmentation.feat[0].q_enabled = 0;
728 s->segmentation.feat[0].lf_enabled = 0;
729 s->segmentation.feat[0].skip_enabled = 0;
730 s->segmentation.feat[0].ref_enabled = 0;
733 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
734 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
735 int qyac, qydc, quvac, quvdc, lflvl, sh;
737 if (s->segmentation.feat[i].q_enabled) {
738 if (s->segmentation.absolute_vals)
739 qyac = s->segmentation.feat[i].q_val;
741 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
745 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
746 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
747 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
748 qyac = av_clip_uintp2(qyac, 8);
750 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
751 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
752 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
753 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
755 sh = s->filter.level >= 32;
756 if (s->segmentation.feat[i].lf_enabled) {
757 if (s->segmentation.absolute_vals)
758 lflvl = s->segmentation.feat[i].lf_val;
760 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
762 lflvl = s->filter.level;
764 if (s->lf_delta.enabled) {
765 s->segmentation.feat[i].lflvl[0][0] =
766 s->segmentation.feat[i].lflvl[0][1] =
767 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
768 for (j = 1; j < 4; j++) {
769 s->segmentation.feat[i].lflvl[j][0] =
770 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
771 s->lf_delta.mode[0]) * (1 << sh)), 6);
772 s->segmentation.feat[i].lflvl[j][1] =
773 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
774 s->lf_delta.mode[1]) * (1 << sh)), 6);
777 memset(s->segmentation.feat[i].lflvl, lflvl,
778 sizeof(s->segmentation.feat[i].lflvl));
783 if ((res = update_size(ctx, w, h, fmt)) < 0) {
784 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
787 for (s->tiling.log2_tile_cols = 0;
788 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
789 s->tiling.log2_tile_cols++) ;
790 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
791 max = FFMAX(0, max - 1);
792 while (max > s->tiling.log2_tile_cols) {
793 if (get_bits1(&s->gb))
794 s->tiling.log2_tile_cols++;
798 s->tiling.log2_tile_rows = decode012(&s->gb);
799 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
800 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
801 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
802 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
803 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
805 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
806 return AVERROR(ENOMEM);
810 if (s->keyframe || s->errorres || s->intraonly) {
811 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
812 s->prob_ctx[3].p = vp9_default_probs;
813 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
814 sizeof(vp9_default_coef_probs));
815 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
816 sizeof(vp9_default_coef_probs));
817 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
818 sizeof(vp9_default_coef_probs));
819 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
820 sizeof(vp9_default_coef_probs));
823 // next 16 bits is size of the rest of the header (arith-coded)
824 size2 = get_bits(&s->gb, 16);
825 data2 = align_get_bits(&s->gb);
826 if (size2 > size - (data2 - data)) {
827 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
828 return AVERROR_INVALIDDATA;
830 ff_vp56_init_range_decoder(&s->c, data2, size2);
831 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
832 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
833 return AVERROR_INVALIDDATA;
836 if (s->keyframe || s->intraonly) {
837 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
839 memset(&s->counts, 0, sizeof(s->counts));
841 // FIXME is it faster to not copy here, but do it down in the fw updates
842 // as explicit copies if the fw update is missing (and skip the copy upon
844 s->prob.p = s->prob_ctx[c].p;
848 s->txfmmode = TX_4X4;
850 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
851 if (s->txfmmode == 3)
852 s->txfmmode += vp8_rac_get(&s->c);
854 if (s->txfmmode == TX_SWITCHABLE) {
855 for (i = 0; i < 2; i++)
856 if (vp56_rac_get_prob_branchy(&s->c, 252))
857 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
858 for (i = 0; i < 2; i++)
859 for (j = 0; j < 2; j++)
860 if (vp56_rac_get_prob_branchy(&s->c, 252))
861 s->prob.p.tx16p[i][j] =
862 update_prob(&s->c, s->prob.p.tx16p[i][j]);
863 for (i = 0; i < 2; i++)
864 for (j = 0; j < 3; j++)
865 if (vp56_rac_get_prob_branchy(&s->c, 252))
866 s->prob.p.tx32p[i][j] =
867 update_prob(&s->c, s->prob.p.tx32p[i][j]);
872 for (i = 0; i < 4; i++) {
873 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
874 if (vp8_rac_get(&s->c)) {
875 for (j = 0; j < 2; j++)
876 for (k = 0; k < 2; k++)
877 for (l = 0; l < 6; l++)
878 for (m = 0; m < 6; m++) {
879 uint8_t *p = s->prob.coef[i][j][k][l][m];
880 uint8_t *r = ref[j][k][l][m];
881 if (m >= 3 && l == 0) // dc only has 3 pt
883 for (n = 0; n < 3; n++) {
884 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
885 p[n] = update_prob(&s->c, r[n]);
893 for (j = 0; j < 2; j++)
894 for (k = 0; k < 2; k++)
895 for (l = 0; l < 6; l++)
896 for (m = 0; m < 6; m++) {
897 uint8_t *p = s->prob.coef[i][j][k][l][m];
898 uint8_t *r = ref[j][k][l][m];
899 if (m > 3 && l == 0) // dc only has 3 pt
905 if (s->txfmmode == i)
910 for (i = 0; i < 3; i++)
911 if (vp56_rac_get_prob_branchy(&s->c, 252))
912 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
913 if (!s->keyframe && !s->intraonly) {
914 for (i = 0; i < 7; i++)
915 for (j = 0; j < 3; j++)
916 if (vp56_rac_get_prob_branchy(&s->c, 252))
917 s->prob.p.mv_mode[i][j] =
918 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
920 if (s->filtermode == FILTER_SWITCHABLE)
921 for (i = 0; i < 4; i++)
922 for (j = 0; j < 2; j++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.filter[i][j] =
925 update_prob(&s->c, s->prob.p.filter[i][j]);
927 for (i = 0; i < 4; i++)
928 if (vp56_rac_get_prob_branchy(&s->c, 252))
929 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
931 if (s->allowcompinter) {
932 s->comppredmode = vp8_rac_get(&s->c);
934 s->comppredmode += vp8_rac_get(&s->c);
935 if (s->comppredmode == PRED_SWITCHABLE)
936 for (i = 0; i < 5; i++)
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
939 update_prob(&s->c, s->prob.p.comp[i]);
941 s->comppredmode = PRED_SINGLEREF;
944 if (s->comppredmode != PRED_COMPREF) {
945 for (i = 0; i < 5; i++) {
946 if (vp56_rac_get_prob_branchy(&s->c, 252))
947 s->prob.p.single_ref[i][0] =
948 update_prob(&s->c, s->prob.p.single_ref[i][0]);
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.single_ref[i][1] =
951 update_prob(&s->c, s->prob.p.single_ref[i][1]);
955 if (s->comppredmode != PRED_SINGLEREF) {
956 for (i = 0; i < 5; i++)
957 if (vp56_rac_get_prob_branchy(&s->c, 252))
958 s->prob.p.comp_ref[i] =
959 update_prob(&s->c, s->prob.p.comp_ref[i]);
962 for (i = 0; i < 4; i++)
963 for (j = 0; j < 9; j++)
964 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 s->prob.p.y_mode[i][j] =
966 update_prob(&s->c, s->prob.p.y_mode[i][j]);
968 for (i = 0; i < 4; i++)
969 for (j = 0; j < 4; j++)
970 for (k = 0; k < 3; k++)
971 if (vp56_rac_get_prob_branchy(&s->c, 252))
972 s->prob.p.partition[3 - i][j][k] =
973 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
975 // mv fields don't use the update_prob subexp model for some reason
976 for (i = 0; i < 3; i++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
980 for (i = 0; i < 2; i++) {
981 if (vp56_rac_get_prob_branchy(&s->c, 252))
982 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
984 for (j = 0; j < 10; j++)
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.mv_comp[i].classes[j] =
987 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
989 if (vp56_rac_get_prob_branchy(&s->c, 252))
990 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
992 for (j = 0; j < 10; j++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.mv_comp[i].bits[j] =
995 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
998 for (i = 0; i < 2; i++) {
999 for (j = 0; j < 2; j++)
1000 for (k = 0; k < 3; k++)
1001 if (vp56_rac_get_prob_branchy(&s->c, 252))
1002 s->prob.p.mv_comp[i].class0_fp[j][k] =
1003 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1005 for (j = 0; j < 3; j++)
1006 if (vp56_rac_get_prob_branchy(&s->c, 252))
1007 s->prob.p.mv_comp[i].fp[j] =
1008 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1011 if (s->highprecisionmvs) {
1012 for (i = 0; i < 2; i++) {
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.mv_comp[i].class0_hp =
1015 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018 s->prob.p.mv_comp[i].hp =
1019 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1024 return (data2 - data) + size2;
1027 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1030 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1031 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1034 static void find_ref_mvs(VP9Context *s,
1035 VP56mv *pmv, int ref, int z, int idx, int sb)
1037 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1038 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1039 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1040 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1041 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1042 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1043 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1044 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1045 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1046 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1047 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1048 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1049 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1050 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1051 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1052 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1053 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1054 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1055 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1056 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1057 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1058 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1059 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1060 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1061 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1062 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1063 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1066 int row = s->row, col = s->col, row7 = s->row7;
1067 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1068 #define INVALID_MV 0x80008000U
1069 uint32_t mem = INVALID_MV;
1072 #define RETURN_DIRECT_MV(mv) \
1074 uint32_t m = AV_RN32A(&mv); \
1078 } else if (mem == INVALID_MV) { \
1080 } else if (m != mem) { \
1087 if (sb == 2 || sb == 1) {
1088 RETURN_DIRECT_MV(b->mv[0][z]);
1089 } else if (sb == 3) {
1090 RETURN_DIRECT_MV(b->mv[2][z]);
1091 RETURN_DIRECT_MV(b->mv[1][z]);
1092 RETURN_DIRECT_MV(b->mv[0][z]);
1095 #define RETURN_MV(mv) \
1100 clamp_mv(&tmp, &mv, s); \
1101 m = AV_RN32A(&tmp); \
1105 } else if (mem == INVALID_MV) { \
1107 } else if (m != mem) { \
1112 uint32_t m = AV_RN32A(&mv); \
1114 clamp_mv(pmv, &mv, s); \
1116 } else if (mem == INVALID_MV) { \
1118 } else if (m != mem) { \
1119 clamp_mv(pmv, &mv, s); \
1126 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1127 if (mv->ref[0] == ref) {
1128 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1129 } else if (mv->ref[1] == ref) {
1130 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1133 if (col > s->tiling.tile_col_start) {
1134 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1135 if (mv->ref[0] == ref) {
1136 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1137 } else if (mv->ref[1] == ref) {
1138 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1146 // previously coded MVs in this neighbourhood, using same reference frame
1147 for (; i < 8; i++) {
1148 int c = p[i][0] + col, r = p[i][1] + row;
1150 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1151 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1153 if (mv->ref[0] == ref) {
1154 RETURN_MV(mv->mv[0]);
1155 } else if (mv->ref[1] == ref) {
1156 RETURN_MV(mv->mv[1]);
1161 // MV at this position in previous frame, using same reference frame
1162 if (s->use_last_frame_mvs) {
1163 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1165 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1166 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1167 if (mv->ref[0] == ref) {
1168 RETURN_MV(mv->mv[0]);
1169 } else if (mv->ref[1] == ref) {
1170 RETURN_MV(mv->mv[1]);
1174 #define RETURN_SCALE_MV(mv, scale) \
1177 VP56mv mv_temp = { -mv.x, -mv.y }; \
1178 RETURN_MV(mv_temp); \
1184 // previously coded MVs in this neighbourhood, using different reference frame
1185 for (i = 0; i < 8; i++) {
1186 int c = p[i][0] + col, r = p[i][1] + row;
1188 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1189 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1191 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1192 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1194 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1195 // BUG - libvpx has this condition regardless of whether
1196 // we used the first ref MV and pre-scaling
1197 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1198 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1203 // MV at this position in previous frame, using different reference frame
1204 if (s->use_last_frame_mvs) {
1205 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1207 // no need to await_progress, because we already did that above
1208 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1209 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1211 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1212 // BUG - libvpx has this condition regardless of whether
1213 // we used the first ref MV and pre-scaling
1214 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1215 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1222 #undef RETURN_SCALE_MV
1225 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1227 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1228 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1229 s->prob.p.mv_comp[idx].classes);
1231 s->counts.mv_comp[idx].sign[sign]++;
1232 s->counts.mv_comp[idx].classes[c]++;
1236 for (n = 0, m = 0; m < c; m++) {
1237 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1239 s->counts.mv_comp[idx].bits[m][bit]++;
1242 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1244 s->counts.mv_comp[idx].fp[bit]++;
1246 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1247 s->counts.mv_comp[idx].hp[bit]++;
1251 // bug in libvpx - we count for bw entropy purposes even if the
1253 s->counts.mv_comp[idx].hp[1]++;
1257 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1258 s->counts.mv_comp[idx].class0[n]++;
1259 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1260 s->prob.p.mv_comp[idx].class0_fp[n]);
1261 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1262 n = (n << 3) | (bit << 1);
1264 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1265 s->counts.mv_comp[idx].class0_hp[bit]++;
1269 // bug in libvpx - we count for bw entropy purposes even if the
1271 s->counts.mv_comp[idx].class0_hp[1]++;
1275 return sign ? -(n + 1) : (n + 1);
1278 static void fill_mv(VP9Context *s,
1279 VP56mv *mv, int mode, int sb)
1283 if (mode == ZEROMV) {
1288 // FIXME cache this value and reuse for other subblocks
1289 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1290 mode == NEWMV ? -1 : sb);
1291 // FIXME maybe move this code into find_ref_mvs()
1292 if ((mode == NEWMV || sb == -1) &&
1293 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1307 if (mode == NEWMV) {
1308 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1309 s->prob.p.mv_joint);
1311 s->counts.mv_joint[j]++;
1312 if (j >= MV_JOINT_V)
1313 mv[0].y += read_mv_component(s, 0, hp);
1315 mv[0].x += read_mv_component(s, 1, hp);
1319 // FIXME cache this value and reuse for other subblocks
1320 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1321 mode == NEWMV ? -1 : sb);
1322 if ((mode == NEWMV || sb == -1) &&
1323 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1337 if (mode == NEWMV) {
1338 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1339 s->prob.p.mv_joint);
1341 s->counts.mv_joint[j]++;
1342 if (j >= MV_JOINT_V)
1343 mv[1].y += read_mv_component(s, 0, hp);
1345 mv[1].x += read_mv_component(s, 1, hp);
1351 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1352 ptrdiff_t stride, int v)
1362 int v16 = v * 0x0101;
1370 uint32_t v32 = v * 0x01010101;
1379 uint64_t v64 = v * 0x0101010101010101ULL;
1385 uint32_t v32 = v * 0x01010101;
1388 AV_WN32A(ptr + 4, v32);
1397 static void decode_mode(AVCodecContext *ctx)
1399 static const uint8_t left_ctx[N_BS_SIZES] = {
1400 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1402 static const uint8_t above_ctx[N_BS_SIZES] = {
1403 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1405 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1406 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1407 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1409 VP9Context *s = ctx->priv_data;
1411 int row = s->row, col = s->col, row7 = s->row7;
1412 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1413 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1414 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1415 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1416 int vref, filter_id;
1418 if (!s->segmentation.enabled) {
1420 } else if (s->keyframe || s->intraonly) {
1421 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1422 } else if (!s->segmentation.update_map ||
1423 (s->segmentation.temporal &&
1424 vp56_rac_get_prob_branchy(&s->c,
1425 s->prob.segpred[s->above_segpred_ctx[col] +
1426 s->left_segpred_ctx[row7]]))) {
1429 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1431 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1432 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1433 for (y = 0; y < h4; y++) {
1434 int idx_base = (y + row) * 8 * s->sb_cols + col;
1435 for (x = 0; x < w4; x++)
1436 pred = FFMIN(pred, refsegmap[idx_base + x]);
1438 av_assert1(pred < 8);
1444 memset(&s->above_segpred_ctx[col], 1, w4);
1445 memset(&s->left_segpred_ctx[row7], 1, h4);
1447 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1450 memset(&s->above_segpred_ctx[col], 0, w4);
1451 memset(&s->left_segpred_ctx[row7], 0, h4);
1453 if (s->segmentation.enabled &&
1454 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1455 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1456 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1459 b->skip = s->segmentation.enabled &&
1460 s->segmentation.feat[b->seg_id].skip_enabled;
1462 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1463 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1464 s->counts.skip[c][b->skip]++;
1467 if (s->keyframe || s->intraonly) {
1469 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1470 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1474 if (have_a && have_l) {
1475 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1478 c = have_a ? 2 * s->above_intra_ctx[col] :
1479 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1481 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1482 s->counts.intra[c][bit]++;
1486 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1490 c = (s->above_skip_ctx[col] ? max_tx :
1491 s->above_txfm_ctx[col]) +
1492 (s->left_skip_ctx[row7] ? max_tx :
1493 s->left_txfm_ctx[row7]) > max_tx;
1495 c = s->above_skip_ctx[col] ? 1 :
1496 (s->above_txfm_ctx[col] * 2 > max_tx);
1498 } else if (have_l) {
1499 c = s->left_skip_ctx[row7] ? 1 :
1500 (s->left_txfm_ctx[row7] * 2 > max_tx);
1506 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1508 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1510 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1512 s->counts.tx32p[c][b->tx]++;
1515 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1517 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1518 s->counts.tx16p[c][b->tx]++;
1521 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1522 s->counts.tx8p[c][b->tx]++;
1529 b->tx = FFMIN(max_tx, s->txfmmode);
1532 if (s->keyframe || s->intraonly) {
1533 uint8_t *a = &s->above_mode_ctx[col * 2];
1534 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1537 if (b->bs > BS_8x8) {
1538 // FIXME the memory storage intermediates here aren't really
1539 // necessary, they're just there to make the code slightly
1541 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1542 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1543 if (b->bs != BS_8x4) {
1544 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1545 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1546 l[0] = a[1] = b->mode[1];
1548 l[0] = a[1] = b->mode[1] = b->mode[0];
1550 if (b->bs != BS_4x8) {
1551 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1552 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1553 if (b->bs != BS_8x4) {
1554 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1555 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1556 l[1] = a[1] = b->mode[3];
1558 l[1] = a[1] = b->mode[3] = b->mode[2];
1561 b->mode[2] = b->mode[0];
1562 l[1] = a[1] = b->mode[3] = b->mode[1];
1565 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1566 vp9_default_kf_ymode_probs[*a][*l]);
1567 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1568 // FIXME this can probably be optimized
1569 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1570 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1572 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1573 vp9_default_kf_uvmode_probs[b->mode[3]]);
1574 } else if (b->intra) {
1576 if (b->bs > BS_8x8) {
1577 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1578 s->prob.p.y_mode[0]);
1579 s->counts.y_mode[0][b->mode[0]]++;
1580 if (b->bs != BS_8x4) {
1581 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1582 s->prob.p.y_mode[0]);
1583 s->counts.y_mode[0][b->mode[1]]++;
1585 b->mode[1] = b->mode[0];
1587 if (b->bs != BS_4x8) {
1588 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1589 s->prob.p.y_mode[0]);
1590 s->counts.y_mode[0][b->mode[2]]++;
1591 if (b->bs != BS_8x4) {
1592 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1593 s->prob.p.y_mode[0]);
1594 s->counts.y_mode[0][b->mode[3]]++;
1596 b->mode[3] = b->mode[2];
1599 b->mode[2] = b->mode[0];
1600 b->mode[3] = b->mode[1];
1603 static const uint8_t size_group[10] = {
1604 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1606 int sz = size_group[b->bs];
1608 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1609 s->prob.p.y_mode[sz]);
1610 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1611 s->counts.y_mode[sz][b->mode[3]]++;
1613 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1614 s->prob.p.uv_mode[b->mode[3]]);
1615 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1617 static const uint8_t inter_mode_ctx_lut[14][14] = {
1618 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1619 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1620 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1621 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1622 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1623 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1624 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1625 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1626 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1627 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1628 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1629 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1630 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1631 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1634 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1635 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1637 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1639 // read comp_pred flag
1640 if (s->comppredmode != PRED_SWITCHABLE) {
1641 b->comp = s->comppredmode == PRED_COMPREF;
1645 // FIXME add intra as ref=0xff (or -1) to make these easier?
1648 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1650 } else if (s->above_comp_ctx[col]) {
1651 c = 2 + (s->left_intra_ctx[row7] ||
1652 s->left_ref_ctx[row7] == s->fixcompref);
1653 } else if (s->left_comp_ctx[row7]) {
1654 c = 2 + (s->above_intra_ctx[col] ||
1655 s->above_ref_ctx[col] == s->fixcompref);
1657 c = (!s->above_intra_ctx[col] &&
1658 s->above_ref_ctx[col] == s->fixcompref) ^
1659 (!s->left_intra_ctx[row7] &&
1660 s->left_ref_ctx[row & 7] == s->fixcompref);
1663 c = s->above_comp_ctx[col] ? 3 :
1664 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1666 } else if (have_l) {
1667 c = s->left_comp_ctx[row7] ? 3 :
1668 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1672 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1673 s->counts.comp[c][b->comp]++;
1676 // read actual references
1677 // FIXME probably cache a few variables here to prevent repetitive
1678 // memory accesses below
1679 if (b->comp) /* two references */ {
1680 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1682 b->ref[fix_idx] = s->fixcompref;
1683 // FIXME can this codeblob be replaced by some sort of LUT?
1686 if (s->above_intra_ctx[col]) {
1687 if (s->left_intra_ctx[row7]) {
1690 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1692 } else if (s->left_intra_ctx[row7]) {
1693 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1695 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1697 if (refl == refa && refa == s->varcompref[1]) {
1699 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1700 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1701 (refl == s->fixcompref && refa == s->varcompref[0])) {
1704 c = (refa == refl) ? 3 : 1;
1706 } else if (!s->left_comp_ctx[row7]) {
1707 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1710 c = (refl == s->varcompref[1] &&
1711 refa != s->varcompref[1]) ? 2 : 4;
1713 } else if (!s->above_comp_ctx[col]) {
1714 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1717 c = (refa == s->varcompref[1] &&
1718 refl != s->varcompref[1]) ? 2 : 4;
1721 c = (refl == refa) ? 4 : 2;
1725 if (s->above_intra_ctx[col]) {
1727 } else if (s->above_comp_ctx[col]) {
1728 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1730 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1733 } else if (have_l) {
1734 if (s->left_intra_ctx[row7]) {
1736 } else if (s->left_comp_ctx[row7]) {
1737 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1739 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1744 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1745 b->ref[var_idx] = s->varcompref[bit];
1746 s->counts.comp_ref[c][bit]++;
1747 } else /* single reference */ {
1750 if (have_a && !s->above_intra_ctx[col]) {
1751 if (have_l && !s->left_intra_ctx[row7]) {
1752 if (s->left_comp_ctx[row7]) {
1753 if (s->above_comp_ctx[col]) {
1754 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1755 !s->above_ref_ctx[col]);
1757 c = (3 * !s->above_ref_ctx[col]) +
1758 (!s->fixcompref || !s->left_ref_ctx[row7]);
1760 } else if (s->above_comp_ctx[col]) {
1761 c = (3 * !s->left_ref_ctx[row7]) +
1762 (!s->fixcompref || !s->above_ref_ctx[col]);
1764 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1766 } else if (s->above_intra_ctx[col]) {
1768 } else if (s->above_comp_ctx[col]) {
1769 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1771 c = 4 * (!s->above_ref_ctx[col]);
1773 } else if (have_l && !s->left_intra_ctx[row7]) {
1774 if (s->left_intra_ctx[row7]) {
1776 } else if (s->left_comp_ctx[row7]) {
1777 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1779 c = 4 * (!s->left_ref_ctx[row7]);
1784 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1785 s->counts.single_ref[c][0][bit]++;
1789 // FIXME can this codeblob be replaced by some sort of LUT?
1792 if (s->left_intra_ctx[row7]) {
1793 if (s->above_intra_ctx[col]) {
1795 } else if (s->above_comp_ctx[col]) {
1796 c = 1 + 2 * (s->fixcompref == 1 ||
1797 s->above_ref_ctx[col] == 1);
1798 } else if (!s->above_ref_ctx[col]) {
1801 c = 4 * (s->above_ref_ctx[col] == 1);
1803 } else if (s->above_intra_ctx[col]) {
1804 if (s->left_intra_ctx[row7]) {
1806 } else if (s->left_comp_ctx[row7]) {
1807 c = 1 + 2 * (s->fixcompref == 1 ||
1808 s->left_ref_ctx[row7] == 1);
1809 } else if (!s->left_ref_ctx[row7]) {
1812 c = 4 * (s->left_ref_ctx[row7] == 1);
1814 } else if (s->above_comp_ctx[col]) {
1815 if (s->left_comp_ctx[row7]) {
1816 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1817 c = 3 * (s->fixcompref == 1 ||
1818 s->left_ref_ctx[row7] == 1);
1822 } else if (!s->left_ref_ctx[row7]) {
1823 c = 1 + 2 * (s->fixcompref == 1 ||
1824 s->above_ref_ctx[col] == 1);
1826 c = 3 * (s->left_ref_ctx[row7] == 1) +
1827 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1829 } else if (s->left_comp_ctx[row7]) {
1830 if (!s->above_ref_ctx[col]) {
1831 c = 1 + 2 * (s->fixcompref == 1 ||
1832 s->left_ref_ctx[row7] == 1);
1834 c = 3 * (s->above_ref_ctx[col] == 1) +
1835 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1837 } else if (!s->above_ref_ctx[col]) {
1838 if (!s->left_ref_ctx[row7]) {
1841 c = 4 * (s->left_ref_ctx[row7] == 1);
1843 } else if (!s->left_ref_ctx[row7]) {
1844 c = 4 * (s->above_ref_ctx[col] == 1);
1846 c = 2 * (s->left_ref_ctx[row7] == 1) +
1847 2 * (s->above_ref_ctx[col] == 1);
1850 if (s->above_intra_ctx[col] ||
1851 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1853 } else if (s->above_comp_ctx[col]) {
1854 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1856 c = 4 * (s->above_ref_ctx[col] == 1);
1859 } else if (have_l) {
1860 if (s->left_intra_ctx[row7] ||
1861 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1863 } else if (s->left_comp_ctx[row7]) {
1864 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1866 c = 4 * (s->left_ref_ctx[row7] == 1);
1871 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1872 s->counts.single_ref[c][1][bit]++;
1873 b->ref[0] = 1 + bit;
1878 if (b->bs <= BS_8x8) {
1879 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1880 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1882 static const uint8_t off[10] = {
1883 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1886 // FIXME this needs to use the LUT tables from find_ref_mvs
1887 // because not all are -1,0/0,-1
1888 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1889 [s->left_mode_ctx[row7 + off[b->bs]]];
1891 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1892 s->prob.p.mv_mode[c]);
1893 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1894 s->counts.mv_mode[c][b->mode[0] - 10]++;
1898 if (s->filtermode == FILTER_SWITCHABLE) {
1901 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1902 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1903 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1904 s->left_filter_ctx[row7] : 3;
1906 c = s->above_filter_ctx[col];
1908 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1909 c = s->left_filter_ctx[row7];
1914 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1915 s->prob.p.filter[c]);
1916 s->counts.filter[c][filter_id]++;
1917 b->filter = vp9_filter_lut[filter_id];
1919 b->filter = s->filtermode;
1922 if (b->bs > BS_8x8) {
1923 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1925 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1926 s->prob.p.mv_mode[c]);
1927 s->counts.mv_mode[c][b->mode[0] - 10]++;
1928 fill_mv(s, b->mv[0], b->mode[0], 0);
1930 if (b->bs != BS_8x4) {
1931 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1932 s->prob.p.mv_mode[c]);
1933 s->counts.mv_mode[c][b->mode[1] - 10]++;
1934 fill_mv(s, b->mv[1], b->mode[1], 1);
1936 b->mode[1] = b->mode[0];
1937 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1938 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1941 if (b->bs != BS_4x8) {
1942 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1943 s->prob.p.mv_mode[c]);
1944 s->counts.mv_mode[c][b->mode[2] - 10]++;
1945 fill_mv(s, b->mv[2], b->mode[2], 2);
1947 if (b->bs != BS_8x4) {
1948 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1949 s->prob.p.mv_mode[c]);
1950 s->counts.mv_mode[c][b->mode[3] - 10]++;
1951 fill_mv(s, b->mv[3], b->mode[3], 3);
1953 b->mode[3] = b->mode[2];
1954 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1955 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1958 b->mode[2] = b->mode[0];
1959 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1960 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1961 b->mode[3] = b->mode[1];
1962 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1963 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1966 fill_mv(s, b->mv[0], b->mode[0], -1);
1967 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1968 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1969 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1970 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1971 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1972 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1975 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1979 #define SPLAT_CTX(var, val, n) \
1981 case 1: var = val; break; \
1982 case 2: AV_WN16A(&var, val * 0x0101); break; \
1983 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1984 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1986 uint64_t v64 = val * 0x0101010101010101ULL; \
1987 AV_WN64A( &var, v64); \
1988 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1993 #define SPLAT_CTX(var, val, n) \
1995 case 1: var = val; break; \
1996 case 2: AV_WN16A(&var, val * 0x0101); break; \
1997 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1999 uint32_t v32 = val * 0x01010101; \
2000 AV_WN32A( &var, v32); \
2001 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2005 uint32_t v32 = val * 0x01010101; \
2006 AV_WN32A( &var, v32); \
2007 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2008 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2009 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2015 switch (bwh_tab[1][b->bs][0]) {
2016 #define SET_CTXS(dir, off, n) \
2018 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2019 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2020 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2021 if (!s->keyframe && !s->intraonly) { \
2022 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2023 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2024 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2026 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2027 if (s->filtermode == FILTER_SWITCHABLE) { \
2028 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2033 case 1: SET_CTXS(above, col, 1); break;
2034 case 2: SET_CTXS(above, col, 2); break;
2035 case 4: SET_CTXS(above, col, 4); break;
2036 case 8: SET_CTXS(above, col, 8); break;
2038 switch (bwh_tab[1][b->bs][1]) {
2039 case 1: SET_CTXS(left, row7, 1); break;
2040 case 2: SET_CTXS(left, row7, 2); break;
2041 case 4: SET_CTXS(left, row7, 4); break;
2042 case 8: SET_CTXS(left, row7, 8); break;
2047 if (!s->keyframe && !s->intraonly) {
2048 if (b->bs > BS_8x8) {
2049 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2051 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2052 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2053 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2054 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2055 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2056 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2057 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2058 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2060 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2062 for (n = 0; n < w4 * 2; n++) {
2063 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2064 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2066 for (n = 0; n < h4 * 2; n++) {
2067 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2068 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2074 for (y = 0; y < h4; y++) {
2075 int x, o = (row + y) * s->sb_cols * 8 + col;
2076 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2079 for (x = 0; x < w4; x++) {
2083 } else if (b->comp) {
2084 for (x = 0; x < w4; x++) {
2085 mv[x].ref[0] = b->ref[0];
2086 mv[x].ref[1] = b->ref[1];
2087 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2088 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2091 for (x = 0; x < w4; x++) {
2092 mv[x].ref[0] = b->ref[0];
2094 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2100 // FIXME merge cnt/eob arguments?
2101 static av_always_inline int
2102 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2103 int is_tx32x32, unsigned (*cnt)[6][3],
2104 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2105 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2106 const int16_t *band_counts, const int16_t *qmul)
2108 int i = 0, band = 0, band_left = band_counts[band];
2109 uint8_t *tp = p[0][nnz];
2110 uint8_t cache[1024];
2115 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2116 eob[band][nnz][val]++;
2121 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2122 cnt[band][nnz][0]++;
2124 band_left = band_counts[++band];
2126 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2128 if (++i == n_coeffs)
2129 break; //invalid input; blocks should end with EOB
2134 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2135 cnt[band][nnz][1]++;
2139 // fill in p[3-10] (model fill) - only once per frame for each pos
2141 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2143 cnt[band][nnz][2]++;
2144 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2145 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2146 cache[rc] = val = 2;
2148 val = 3 + vp56_rac_get_prob(c, tp[5]);
2151 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2153 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2154 val = 5 + vp56_rac_get_prob(c, 159);
2156 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2157 val += vp56_rac_get_prob(c, 145);
2161 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2162 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2163 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2164 val += (vp56_rac_get_prob(c, 148) << 1);
2165 val += vp56_rac_get_prob(c, 140);
2167 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2168 val += (vp56_rac_get_prob(c, 155) << 2);
2169 val += (vp56_rac_get_prob(c, 140) << 1);
2170 val += vp56_rac_get_prob(c, 135);
2172 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2173 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2174 val += (vp56_rac_get_prob(c, 157) << 3);
2175 val += (vp56_rac_get_prob(c, 141) << 2);
2176 val += (vp56_rac_get_prob(c, 134) << 1);
2177 val += vp56_rac_get_prob(c, 130);
2179 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2180 val += (vp56_rac_get_prob(c, 254) << 12);
2181 val += (vp56_rac_get_prob(c, 254) << 11);
2182 val += (vp56_rac_get_prob(c, 252) << 10);
2183 val += (vp56_rac_get_prob(c, 249) << 9);
2184 val += (vp56_rac_get_prob(c, 243) << 8);
2185 val += (vp56_rac_get_prob(c, 230) << 7);
2186 val += (vp56_rac_get_prob(c, 196) << 6);
2187 val += (vp56_rac_get_prob(c, 177) << 5);
2188 val += (vp56_rac_get_prob(c, 153) << 4);
2189 val += (vp56_rac_get_prob(c, 140) << 3);
2190 val += (vp56_rac_get_prob(c, 133) << 2);
2191 val += (vp56_rac_get_prob(c, 130) << 1);
2192 val += vp56_rac_get_prob(c, 129);
2197 band_left = band_counts[++band];
2199 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2201 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2202 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2204 } while (++i < n_coeffs);
2209 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2210 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2211 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2212 const int16_t (*nb)[2], const int16_t *band_counts,
2213 const int16_t *qmul)
2215 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2216 nnz, scan, nb, band_counts, qmul);
2219 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2220 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2221 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2222 const int16_t (*nb)[2], const int16_t *band_counts,
2223 const int16_t *qmul)
2225 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2226 nnz, scan, nb, band_counts, qmul);
2229 static void decode_coeffs(AVCodecContext *ctx)
2231 VP9Context *s = ctx->priv_data;
2233 int row = s->row, col = s->col;
2234 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2235 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2236 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2237 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2238 int end_x = FFMIN(2 * (s->cols - col), w4);
2239 int end_y = FFMIN(2 * (s->rows - row), h4);
2240 int n, pl, x, y, res;
2241 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2242 int tx = 4 * s->lossless + b->tx;
2243 const int16_t * const *yscans = vp9_scans[tx];
2244 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2245 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2246 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2247 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2248 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2249 static const int16_t band_counts[4][8] = {
2250 { 1, 2, 3, 4, 3, 16 - 13 },
2251 { 1, 2, 3, 4, 11, 64 - 21 },
2252 { 1, 2, 3, 4, 11, 256 - 21 },
2253 { 1, 2, 3, 4, 11, 1024 - 21 },
2255 const int16_t *y_band_counts = band_counts[b->tx];
2256 const int16_t *uv_band_counts = band_counts[b->uvtx];
2258 #define MERGE(la, end, step, rd) \
2259 for (n = 0; n < end; n += step) \
2260 la[n] = !!rd(&la[n])
2261 #define MERGE_CTX(step, rd) \
2263 MERGE(l, end_y, step, rd); \
2264 MERGE(a, end_x, step, rd); \
2267 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2268 for (n = 0, y = 0; y < end_y; y += step) { \
2269 for (x = 0; x < end_x; x += step, n += step * step) { \
2270 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2271 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2272 c, e, p, a[x] + l[y], yscans[txtp], \
2273 ynbs[txtp], y_band_counts, qmul[0]); \
2274 a[x] = l[y] = !!res; \
2276 AV_WN16A(&s->eob[n], res); \
2283 #define SPLAT(la, end, step, cond) \
2285 for (n = 1; n < end; n += step) \
2286 la[n] = la[n - 1]; \
2287 } else if (step == 4) { \
2289 for (n = 0; n < end; n += step) \
2290 AV_WN32A(&la[n], la[n] * 0x01010101); \
2292 for (n = 0; n < end; n += step) \
2293 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2295 } else /* step == 8 */ { \
2297 if (HAVE_FAST_64BIT) { \
2298 for (n = 0; n < end; n += step) \
2299 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2301 for (n = 0; n < end; n += step) { \
2302 uint32_t v32 = la[n] * 0x01010101; \
2303 AV_WN32A(&la[n], v32); \
2304 AV_WN32A(&la[n + 4], v32); \
2308 for (n = 0; n < end; n += step) \
2309 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2312 #define SPLAT_CTX(step) \
2314 SPLAT(a, end_x, step, end_x == w4); \
2315 SPLAT(l, end_y, step, end_y == h4); \
2321 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2324 MERGE_CTX(2, AV_RN16A);
2325 DECODE_Y_COEF_LOOP(2, 0,);
2329 MERGE_CTX(4, AV_RN32A);
2330 DECODE_Y_COEF_LOOP(4, 0,);
2334 MERGE_CTX(8, AV_RN64A);
2335 DECODE_Y_COEF_LOOP(8, 0, 32);
2340 #define DECODE_UV_COEF_LOOP(step, decode_coeffs_fn) \
2341 for (n = 0, y = 0; y < end_y; y += step) { \
2342 for (x = 0; x < end_x; x += step, n += step * step) { \
2343 res = decode_coeffs_fn(&s->c, s->uvblock[pl] + 16 * n, \
2344 16 * step * step, c, e, p, a[x] + l[y], \
2345 uvscan, uvnb, uv_band_counts, qmul[1]); \
2346 a[x] = l[y] = !!res; \
2348 AV_WN16A(&s->uveob[pl][n], res); \
2350 s->uveob[pl][n] = res; \
2355 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2356 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2357 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2362 for (pl = 0; pl < 2; pl++) {
2363 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2364 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2367 DECODE_UV_COEF_LOOP(1, decode_coeffs_b);
2370 MERGE_CTX(2, AV_RN16A);
2371 DECODE_UV_COEF_LOOP(2, decode_coeffs_b);
2375 MERGE_CTX(4, AV_RN32A);
2376 DECODE_UV_COEF_LOOP(4, decode_coeffs_b);
2380 MERGE_CTX(8, AV_RN64A);
2381 DECODE_UV_COEF_LOOP(8, decode_coeffs_b32);
2388 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2389 uint8_t *dst_edge, ptrdiff_t stride_edge,
2390 uint8_t *dst_inner, ptrdiff_t stride_inner,
2391 uint8_t *l, int col, int x, int w,
2392 int row, int y, enum TxfmMode tx,
2393 int p, int ss_h, int ss_v)
2395 int have_top = row > 0 || y > 0;
2396 int have_left = col > s->tiling.tile_col_start || x > 0;
2397 int have_right = x < w - 1;
2398 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2399 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2400 { DC_127_PRED, VERT_PRED } },
2401 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2402 { HOR_PRED, HOR_PRED } },
2403 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2404 { LEFT_DC_PRED, DC_PRED } },
2405 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2406 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2407 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2408 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2409 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2410 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2411 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2412 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2413 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2414 { DC_127_PRED, VERT_LEFT_PRED } },
2415 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2416 { HOR_UP_PRED, HOR_UP_PRED } },
2417 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2418 { HOR_PRED, TM_VP8_PRED } },
2420 static const struct {
2421 uint8_t needs_left:1;
2422 uint8_t needs_top:1;
2423 uint8_t needs_topleft:1;
2424 uint8_t needs_topright:1;
2425 uint8_t invert_left:1;
2426 } edges[N_INTRA_PRED_MODES] = {
2427 [VERT_PRED] = { .needs_top = 1 },
2428 [HOR_PRED] = { .needs_left = 1 },
2429 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2430 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2431 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2432 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2433 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2434 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2435 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2436 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2437 [LEFT_DC_PRED] = { .needs_left = 1 },
2438 [TOP_DC_PRED] = { .needs_top = 1 },
2439 [DC_128_PRED] = { 0 },
2440 [DC_127_PRED] = { 0 },
2441 [DC_129_PRED] = { 0 }
2444 av_assert2(mode >= 0 && mode < 10);
2445 mode = mode_conv[mode][have_left][have_top];
2446 if (edges[mode].needs_top) {
2447 uint8_t *top, *topleft;
2448 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2449 int n_px_need_tr = 0;
2451 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2454 // if top of sb64-row, use s->intra_pred_data[] instead of
2455 // dst[-stride] for intra prediction (it contains pre- instead of
2456 // post-loopfilter data)
2458 top = !(row & 7) && !y ?
2459 s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
2460 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2462 topleft = !(row & 7) && !y ?
2463 s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
2464 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2465 &dst_inner[-stride_inner];
2469 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2470 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2471 n_px_need + n_px_need_tr <= n_px_have) {
2475 if (n_px_need <= n_px_have) {
2476 memcpy(*a, top, n_px_need);
2478 memcpy(*a, top, n_px_have);
2479 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2480 n_px_need - n_px_have);
2483 memset(*a, 127, n_px_need);
2485 if (edges[mode].needs_topleft) {
2486 if (have_left && have_top) {
2487 (*a)[-1] = topleft[-1];
2489 (*a)[-1] = have_top ? 129 : 127;
2492 if (tx == TX_4X4 && edges[mode].needs_topright) {
2493 if (have_top && have_right &&
2494 n_px_need + n_px_need_tr <= n_px_have) {
2495 memcpy(&(*a)[4], &top[4], 4);
2497 memset(&(*a)[4], (*a)[3], 4);
2502 if (edges[mode].needs_left) {
2504 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2505 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2506 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2508 if (edges[mode].invert_left) {
2509 if (n_px_need <= n_px_have) {
2510 for (i = 0; i < n_px_need; i++)
2511 l[i] = dst[i * stride - 1];
2513 for (i = 0; i < n_px_have; i++)
2514 l[i] = dst[i * stride - 1];
2515 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2518 if (n_px_need <= n_px_have) {
2519 for (i = 0; i < n_px_need; i++)
2520 l[n_px_need - 1 - i] = dst[i * stride - 1];
2522 for (i = 0; i < n_px_have; i++)
2523 l[n_px_need - 1 - i] = dst[i * stride - 1];
2524 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2528 memset(l, 129, 4 << tx);
2535 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2537 VP9Context *s = ctx->priv_data;
2539 int row = s->row, col = s->col;
2540 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2541 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2542 int end_x = FFMIN(2 * (s->cols - col), w4);
2543 int end_y = FFMIN(2 * (s->rows - row), h4);
2544 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2545 int uvstep1d = 1 << b->uvtx, p;
2546 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2547 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2548 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2550 for (n = 0, y = 0; y < end_y; y += step1d) {
2551 uint8_t *ptr = dst, *ptr_r = dst_r;
2552 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2553 ptr_r += 4 * step1d, n += step) {
2554 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2556 uint8_t *a = &a_buf[32];
2557 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2558 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2560 mode = check_intra_mode(s, mode, &a, ptr_r,
2561 s->frames[CUR_FRAME].tf.f->linesize[0],
2562 ptr, s->y_stride, l,
2563 col, x, w4, row, y, b->tx, 0, 0, 0);
2564 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2566 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2567 s->block + 16 * n, eob);
2569 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2570 dst += 4 * step1d * s->y_stride;
2577 step = 1 << (b->uvtx * 2);
2578 for (p = 0; p < 2; p++) {
2579 dst = s->dst[1 + p];
2580 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2581 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2582 uint8_t *ptr = dst, *ptr_r = dst_r;
2583 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2584 ptr_r += 4 * uvstep1d, n += step) {
2585 int mode = b->uvmode;
2586 uint8_t *a = &a_buf[32];
2587 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2589 mode = check_intra_mode(s, mode, &a, ptr_r,
2590 s->frames[CUR_FRAME].tf.f->linesize[1],
2591 ptr, s->uv_stride, l, col, x, w4, row, y,
2592 b->uvtx, p + 1, s->ss_h, s->ss_v);
2593 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2595 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2596 s->uvblock[p] + 16 * n, eob);
2598 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2599 dst += 4 * uvstep1d * s->uv_stride;
2604 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2605 uint8_t *dst, ptrdiff_t dst_stride,
2606 const uint8_t *ref, ptrdiff_t ref_stride,
2607 ThreadFrame *ref_frame,
2608 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2609 int bw, int bh, int w, int h,
2610 const uint16_t *scale, const uint8_t *step)
2612 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2613 // BUG libvpx seems to scale the two components separately. This introduces
2614 // rounding errors but we have to reproduce them to be exactly compatible
2615 // with the output from libvpx...
2616 int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2617 int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2618 int refbw_m1, refbh_m1;
2623 ref += y * ref_stride + x;
2626 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2627 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2628 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2629 // we use +7 because the last 7 pixels of each sbrow can be changed in
2630 // the longest loopfilter of the next sbrow
2631 th = (y + refbh_m1 + 4 + 7) >> 6;
2632 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2633 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2634 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2635 ref - 3 * ref_stride - 3,
2637 refbw_m1 + 8, refbh_m1 + 8,
2638 x - 3, y - 3, w, h);
2639 ref = s->edge_emu_buffer + 3 * 144 + 3;
2642 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2645 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2646 uint8_t *dst_u, uint8_t *dst_v,
2647 ptrdiff_t dst_stride,
2648 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2649 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2650 ThreadFrame *ref_frame,
2651 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2652 int bw, int bh, int w, int h,
2653 const uint16_t *scale, const uint8_t *step)
2655 // BUG https://code.google.com/p/webm/issues/detail?id=820
2656 int mx = scale_mv(mv->x << !s->ss_h, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2657 int my = scale_mv(mv->y << !s->ss_v, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2659 int refbw_m1, refbh_m1;
2664 ref_u += y * src_stride_u + x;
2665 ref_v += y * src_stride_v + x;
2668 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2669 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2670 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2671 // we use +7 because the last 7 pixels of each sbrow can be changed in
2672 // the longest loopfilter of the next sbrow
2673 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2674 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2675 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2676 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2677 ref_u - 3 * src_stride_u - 3,
2679 refbw_m1 + 8, refbh_m1 + 8,
2680 x - 3, y - 3, w, h);
2681 ref_u = s->edge_emu_buffer + 3 * 144 + 3;
2682 smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2684 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2685 ref_v - 3 * src_stride_v - 3,
2687 refbw_m1 + 8, refbh_m1 + 8,
2688 x - 3, y - 3, w, h);
2689 ref_v = s->edge_emu_buffer + 3 * 144 + 3;
2690 smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2692 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2693 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2697 #define FN(x) x##_scaled
2698 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2699 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2700 mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2701 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2702 row, col, mv, bw, bh, w, h, i) \
2703 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2704 row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2705 #include "vp9_mc_template.c"
2707 #undef mc_chroma_dir
2710 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2711 uint8_t *dst, ptrdiff_t dst_stride,
2712 const uint8_t *ref, ptrdiff_t ref_stride,
2713 ThreadFrame *ref_frame,
2714 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2715 int bw, int bh, int w, int h)
2717 int mx = mv->x, my = mv->y, th;
2721 ref += y * ref_stride + x;
2724 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2725 // we use +7 because the last 7 pixels of each sbrow can be changed in
2726 // the longest loopfilter of the next sbrow
2727 th = (y + bh + 4 * !!my + 7) >> 6;
2728 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2729 if (x < !!mx * 3 || y < !!my * 3 ||
2730 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2731 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2732 ref - !!my * 3 * ref_stride - !!mx * 3,
2734 bw + !!mx * 7, bh + !!my * 7,
2735 x - !!mx * 3, y - !!my * 3, w, h);
2736 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2739 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2742 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2743 uint8_t *dst_u, uint8_t *dst_v,
2744 ptrdiff_t dst_stride,
2745 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2746 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2747 ThreadFrame *ref_frame,
2748 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2749 int bw, int bh, int w, int h)
2751 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2755 ref_u += y * src_stride_u + x;
2756 ref_v += y * src_stride_v + x;
2759 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2760 // we use +7 because the last 7 pixels of each sbrow can be changed in
2761 // the longest loopfilter of the next sbrow
2762 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2763 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2764 if (x < !!mx * 3 || y < !!my * 3 ||
2765 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2766 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2767 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2769 bw + !!mx * 7, bh + !!my * 7,
2770 x - !!mx * 3, y - !!my * 3, w, h);
2771 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2772 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2774 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2775 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2777 bw + !!mx * 7, bh + !!my * 7,
2778 x - !!mx * 3, y - !!my * 3, w, h);
2779 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2780 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2782 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2783 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2788 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2789 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2791 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2792 row, col, mv, bw, bh, w, h, i) \
2793 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2794 row, col, mv, bw, bh, w, h)
2795 #include "vp9_mc_template.c"
2796 #undef mc_luma_dir_dir
2797 #undef mc_chroma_dir_dir
2800 static void inter_recon(AVCodecContext *ctx)
2802 VP9Context *s = ctx->priv_data;
2804 int row = s->row, col = s->col;
2806 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2807 inter_pred_scaled(ctx);
2812 /* mostly copied intra_recon() */
2814 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2815 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2816 int end_x = FFMIN(2 * (s->cols - col), w4);
2817 int end_y = FFMIN(2 * (s->rows - row), h4);
2818 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2819 int uvstep1d = 1 << b->uvtx, p;
2820 uint8_t *dst = s->dst[0];
2823 for (n = 0, y = 0; y < end_y; y += step1d) {
2825 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2826 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2829 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2830 s->block + 16 * n, eob);
2832 dst += 4 * s->y_stride * step1d;
2838 step = 1 << (b->uvtx * 2);
2839 for (p = 0; p < 2; p++) {
2840 dst = s->dst[p + 1];
2841 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2843 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2844 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2847 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2848 s->uvblock[p] + 16 * n, eob);
2850 dst += 4 * uvstep1d * s->uv_stride;
2856 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
2857 int row_and_7, int col_and_7,
2858 int w, int h, int col_end, int row_end,
2859 enum TxfmMode tx, int skip_inter)
2861 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
2862 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
2864 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2865 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2866 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2867 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2869 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2870 // edges. This means that for UV, we work on two subsampled blocks at
2871 // a time, and we only use the topleft block's mode information to set
2872 // things like block strength. Thus, for any block size smaller than
2873 // 16x16, ignore the odd portion of the block.
2874 if (tx == TX_4X4 && (ss_v | ss_h)) {
2889 if (tx == TX_4X4 && !skip_inter) {
2890 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2891 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2892 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
2894 for (y = row_and_7; y < h + row_and_7; y++) {
2895 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
2897 mask[0][y][1] |= m_row_8;
2898 mask[0][y][2] |= m_row_4;
2899 // for odd lines, if the odd col is not being filtered,
2900 // skip odd row also:
2907 // if a/c are even row/col and b/d are odd, and d is skipped,
2908 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2909 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
2910 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
2912 mask[1][y][col_mask_id] |= m_col;
2915 mask[0][y][3] |= m_col;
2917 mask[1][y][3] |= m_col;
2920 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2923 int mask_id = (tx == TX_8X8);
2924 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2925 int l2 = tx + ss_h - 1, step1d;
2926 int m_row = m_col & masks[l2];
2928 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2929 // 8wd loopfilter to prevent going off the visible edge.
2930 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2931 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2932 int m_row_8 = m_row - m_row_16;
2934 for (y = row_and_7; y < h + row_and_7; y++) {
2935 mask[0][y][0] |= m_row_16;
2936 mask[0][y][1] |= m_row_8;
2939 for (y = row_and_7; y < h + row_and_7; y++)
2940 mask[0][y][mask_id] |= m_row;
2945 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2946 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2947 mask[1][y][0] |= m_col;
2948 if (y - row_and_7 == h - 1)
2949 mask[1][y][1] |= m_col;
2951 for (y = row_and_7; y < h + row_and_7; y += step1d)
2952 mask[1][y][mask_id] |= m_col;
2954 } else if (tx != TX_4X4) {
2957 mask_id = (tx == TX_8X8) || (h == ss_v);
2958 mask[1][row_and_7][mask_id] |= m_col;
2959 mask_id = (tx == TX_8X8) || (w == ss_h);
2960 for (y = row_and_7; y < h + row_and_7; y++)
2961 mask[0][y][mask_id] |= t;
2963 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
2965 for (y = row_and_7; y < h + row_and_7; y++) {
2966 mask[0][y][2] |= t4;
2967 mask[0][y][1] |= t8;
2969 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
2974 static void decode_b(AVCodecContext *ctx, int row, int col,
2975 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2976 enum BlockLevel bl, enum BlockPartition bp)
2978 VP9Context *s = ctx->priv_data;
2980 enum BlockSize bs = bl * 3 + bp;
2981 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2983 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2989 s->min_mv.x = -(128 + col * 64);
2990 s->min_mv.y = -(128 + row * 64);
2991 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2992 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2998 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
2999 (s->ss_v && h4 * 2 == (1 << b->tx)));
3006 #define SPLAT_ZERO_CTX(v, n) \
3008 case 1: v = 0; break; \
3009 case 2: AV_ZERO16(&v); break; \
3010 case 4: AV_ZERO32(&v); break; \
3011 case 8: AV_ZERO64(&v); break; \
3012 case 16: AV_ZERO128(&v); break; \
3014 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3016 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3017 if (s->ss_##dir2) { \
3018 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3019 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3021 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3022 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3027 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3028 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3029 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3030 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3033 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3034 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3035 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3036 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3041 s->block += w4 * h4 * 64;
3042 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3043 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3044 s->eob += 4 * w4 * h4;
3045 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3046 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3052 // emulated overhangs if the stride of the target buffer can't hold. This
3053 // allows to support emu-edge and so on even if we have large block
3055 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3056 (row + h4) > s->rows;
3057 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3058 (row + h4) > s->rows;
3060 s->dst[0] = s->tmp_y;
3063 s->dst[0] = f->data[0] + yoff;
3064 s->y_stride = f->linesize[0];
3067 s->dst[1] = s->tmp_uv[0];
3068 s->dst[2] = s->tmp_uv[1];
3071 s->dst[1] = f->data[1] + uvoff;
3072 s->dst[2] = f->data[2] + uvoff;
3073 s->uv_stride = f->linesize[1];
3076 intra_recon(ctx, yoff, uvoff);
3081 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3083 for (n = 0; o < w; n++) {
3088 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3089 s->tmp_y + o, 64, h, 0, 0);
3095 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3097 for (n = 1; o < w; n++) {
3102 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3103 s->tmp_uv[0] + o, 32, h, 0, 0);
3104 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3105 s->tmp_uv[1] + o, 32, h, 0, 0);
3111 // pick filter level and find edges to apply filter to
3112 if (s->filter.level &&
3113 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3114 [b->mode[3] != ZEROMV]) > 0) {
3115 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3116 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3118 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3119 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3120 if (s->ss_h || s->ss_v)
3121 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3122 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3123 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3124 b->uvtx, skip_inter);
3126 if (!s->filter.lim_lut[lvl]) {
3127 int sharp = s->filter.sharpness;
3131 limit >>= (sharp + 3) >> 2;
3132 limit = FFMIN(limit, 9 - sharp);
3134 limit = FFMAX(limit, 1);
3136 s->filter.lim_lut[lvl] = limit;
3137 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3143 s->block += w4 * h4 * 64;
3144 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3145 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3146 s->eob += 4 * w4 * h4;
3147 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3148 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3152 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3153 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3155 VP9Context *s = ctx->priv_data;
3156 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3157 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3158 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3159 s->prob.p.partition[bl][c];
3160 enum BlockPartition bp;
3161 ptrdiff_t hbs = 4 >> bl;
3162 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3163 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3166 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3167 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3168 } else if (col + hbs < s->cols) { // FIXME why not <=?
3169 if (row + hbs < s->rows) { // FIXME why not <=?
3170 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3172 case PARTITION_NONE:
3173 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3176 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3177 yoff += hbs * 8 * y_stride;
3178 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3179 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3182 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3184 uvoff += hbs * 8 >> s->ss_h;
3185 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3187 case PARTITION_SPLIT:
3188 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3189 decode_sb(ctx, row, col + hbs, lflvl,
3190 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3191 yoff += hbs * 8 * y_stride;
3192 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3193 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3194 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3195 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3200 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3201 bp = PARTITION_SPLIT;
3202 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3203 decode_sb(ctx, row, col + hbs, lflvl,
3204 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3207 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3209 } else if (row + hbs < s->rows) { // FIXME why not <=?
3210 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3211 bp = PARTITION_SPLIT;
3212 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3213 yoff += hbs * 8 * y_stride;
3214 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3215 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3218 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3221 bp = PARTITION_SPLIT;
3222 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3224 s->counts.partition[bl][c][bp]++;
3227 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3228 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3230 VP9Context *s = ctx->priv_data;
3232 ptrdiff_t hbs = 4 >> bl;
3233 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3234 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3237 av_assert2(b->bl == BL_8X8);
3238 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3239 } else if (s->b->bl == bl) {
3240 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3241 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3242 yoff += hbs * 8 * y_stride;
3243 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3244 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3245 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3247 uvoff += hbs * 8 >> s->ss_h;
3248 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3251 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3252 if (col + hbs < s->cols) { // FIXME why not <=?
3253 if (row + hbs < s->rows) {
3254 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3255 uvoff + (8 * hbs >> s->ss_h), bl + 1);
3256 yoff += hbs * 8 * y_stride;
3257 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3258 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3259 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3260 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3263 uvoff += hbs * 8 >> s->ss_h;
3264 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3266 } else if (row + hbs < s->rows) {
3267 yoff += hbs * 8 * y_stride;
3268 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3269 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3274 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3275 uint8_t *lvl, uint8_t (*mask)[4],
3276 uint8_t *dst, ptrdiff_t ls)
3280 // filter edges between columns (e.g. block1 | block2)
3281 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3282 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3283 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3284 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3285 unsigned hm = hm1 | hm2 | hm13 | hm23;
3287 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 >> ss_h) {
3290 int L = *l, H = L >> 4;
3291 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3293 if (hmask1[0] & x) {
3294 if (hmask2[0] & x) {
3295 av_assert2(l[8 << ss_v] == L);
3296 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3298 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3300 } else if (hm2 & x) {
3303 E |= s->filter.mblim_lut[L] << 8;
3304 I |= s->filter.lim_lut[L] << 8;
3305 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3307 [0](ptr, ls, E, I, H);
3309 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3310 [0](ptr, ls, E, I, H);
3312 } else if (hm2 & x) {
3313 int L = l[8 << ss_v], H = L >> 4;
3314 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3316 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3317 [0](ptr + 8 * ls, ls, E, I, H);
3325 int L = *l, H = L >> 4;
3326 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3331 E |= s->filter.mblim_lut[L] << 8;
3332 I |= s->filter.lim_lut[L] << 8;
3333 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls, E, I, H);
3335 s->dsp.loop_filter_8[0][0](ptr + 4, ls, E, I, H);
3337 } else if (hm23 & x) {
3338 int L = l[8 << ss_v], H = L >> 4;
3339 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3341 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4, ls, E, I, H);
3349 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3350 uint8_t *lvl, uint8_t (*mask)[4],
3351 uint8_t *dst, ptrdiff_t ls)
3356 // filter edges between rows (e.g. ------)
3358 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3359 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3360 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3362 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16, l += 2 << ss_h) {
3365 int L = *l, H = L >> 4;
3366 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3369 if (vmask[0] & (x << (1 + ss_h))) {
3370 av_assert2(l[1 + ss_h] == L);
3371 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3373 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3375 } else if (vm & (x << (1 + ss_h))) {
3378 E |= s->filter.mblim_lut[L] << 8;
3379 I |= s->filter.lim_lut[L] << 8;
3380 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3381 [!!(vmask[1] & (x << (1 + ss_h)))]
3382 [1](ptr, ls, E, I, H);
3384 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3385 [1](ptr, ls, E, I, H);
3387 } else if (vm & (x << (1 + ss_h))) {
3388 int L = l[1 + ss_h], H = L >> 4;
3389 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3391 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3392 [1](ptr + 8, ls, E, I, H);
3397 int L = *l, H = L >> 4;
3398 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3400 if (vm3 & (x << (1 + ss_h))) {
3403 E |= s->filter.mblim_lut[L] << 8;
3404 I |= s->filter.lim_lut[L] << 8;
3405 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3407 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3409 } else if (vm3 & (x << (1 + ss_h))) {
3410 int L = l[1 + ss_h], H = L >> 4;
3411 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3413 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8, ls, E, I, H);
3426 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3427 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3429 VP9Context *s = ctx->priv_data;
3430 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3431 uint8_t *dst = f->data[0] + yoff;
3432 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3433 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3436 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3437 // if you think of them as acting on a 8x8 block max, we can interleave
3438 // each v/h within the single x loop, but that only works if we work on
3439 // 8 pixel blocks, and we won't always do that (we want at least 16px
3440 // to use SSE2 optimizations, perhaps 32 for AVX2)
3442 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3443 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3445 for (p = 0; p < 2; p++) {
3446 dst = f->data[1 + p] + uvoff;
3447 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3448 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3452 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3454 int sb_start = ( idx * n) >> log2_n;
3455 int sb_end = ((idx + 1) * n) >> log2_n;
3456 *start = FFMIN(sb_start, n) << 3;
3457 *end = FFMIN(sb_end, n) << 3;
3460 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3461 int max_count, int update_factor)
3463 unsigned ct = ct0 + ct1, p2, p1;
3469 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3470 p2 = av_clip(p2, 1, 255);
3471 ct = FFMIN(ct, max_count);
3472 update_factor = FASTDIV(update_factor * ct, max_count);
3474 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3475 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3478 static void adapt_probs(VP9Context *s)
3481 prob_context *p = &s->prob_ctx[s->framectxid].p;
3482 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3485 for (i = 0; i < 4; i++)
3486 for (j = 0; j < 2; j++)
3487 for (k = 0; k < 2; k++)
3488 for (l = 0; l < 6; l++)
3489 for (m = 0; m < 6; m++) {
3490 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3491 unsigned *e = s->counts.eob[i][j][k][l][m];
3492 unsigned *c = s->counts.coef[i][j][k][l][m];
3494 if (l == 0 && m >= 3) // dc only has 3 pt
3497 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3498 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3499 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3502 if (s->keyframe || s->intraonly) {
3503 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3504 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3505 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3506 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3511 for (i = 0; i < 3; i++)
3512 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3515 for (i = 0; i < 4; i++)
3516 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3519 if (s->comppredmode == PRED_SWITCHABLE) {
3520 for (i = 0; i < 5; i++)
3521 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3525 if (s->comppredmode != PRED_SINGLEREF) {
3526 for (i = 0; i < 5; i++)
3527 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3528 s->counts.comp_ref[i][1], 20, 128);
3531 if (s->comppredmode != PRED_COMPREF) {
3532 for (i = 0; i < 5; i++) {
3533 uint8_t *pp = p->single_ref[i];
3534 unsigned (*c)[2] = s->counts.single_ref[i];
3536 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3537 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3541 // block partitioning
3542 for (i = 0; i < 4; i++)
3543 for (j = 0; j < 4; j++) {
3544 uint8_t *pp = p->partition[i][j];
3545 unsigned *c = s->counts.partition[i][j];
3547 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3548 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3549 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3553 if (s->txfmmode == TX_SWITCHABLE) {
3554 for (i = 0; i < 2; i++) {
3555 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3557 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3558 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3559 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3560 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3561 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3562 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3566 // interpolation filter
3567 if (s->filtermode == FILTER_SWITCHABLE) {
3568 for (i = 0; i < 4; i++) {
3569 uint8_t *pp = p->filter[i];
3570 unsigned *c = s->counts.filter[i];
3572 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3573 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3578 for (i = 0; i < 7; i++) {
3579 uint8_t *pp = p->mv_mode[i];
3580 unsigned *c = s->counts.mv_mode[i];
3582 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3583 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3584 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3589 uint8_t *pp = p->mv_joint;
3590 unsigned *c = s->counts.mv_joint;
3592 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3593 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3594 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3598 for (i = 0; i < 2; i++) {
3600 unsigned *c, (*c2)[2], sum;
3602 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3603 s->counts.mv_comp[i].sign[1], 20, 128);
3605 pp = p->mv_comp[i].classes;
3606 c = s->counts.mv_comp[i].classes;
3607 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3608 adapt_prob(&pp[0], c[0], sum, 20, 128);
3610 adapt_prob(&pp[1], c[1], sum, 20, 128);
3612 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3613 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3615 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3616 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3618 adapt_prob(&pp[6], c[6], sum, 20, 128);
3619 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3620 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3621 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3623 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3624 s->counts.mv_comp[i].class0[1], 20, 128);
3625 pp = p->mv_comp[i].bits;
3626 c2 = s->counts.mv_comp[i].bits;
3627 for (j = 0; j < 10; j++)
3628 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3630 for (j = 0; j < 2; j++) {
3631 pp = p->mv_comp[i].class0_fp[j];
3632 c = s->counts.mv_comp[i].class0_fp[j];
3633 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3634 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3635 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3637 pp = p->mv_comp[i].fp;
3638 c = s->counts.mv_comp[i].fp;
3639 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3640 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3641 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3643 if (s->highprecisionmvs) {
3644 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3645 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3646 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3647 s->counts.mv_comp[i].hp[1], 20, 128);
3652 for (i = 0; i < 4; i++) {
3653 uint8_t *pp = p->y_mode[i];
3654 unsigned *c = s->counts.y_mode[i], sum, s2;
3656 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3657 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3658 sum -= c[TM_VP8_PRED];
3659 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3660 sum -= c[VERT_PRED];
3661 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3662 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3664 adapt_prob(&pp[3], s2, sum, 20, 128);
3666 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3667 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3668 sum -= c[DIAG_DOWN_LEFT_PRED];
3669 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3670 sum -= c[VERT_LEFT_PRED];
3671 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3672 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3676 for (i = 0; i < 10; i++) {
3677 uint8_t *pp = p->uv_mode[i];
3678 unsigned *c = s->counts.uv_mode[i], sum, s2;
3680 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3681 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3682 sum -= c[TM_VP8_PRED];
3683 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3684 sum -= c[VERT_PRED];
3685 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3686 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3688 adapt_prob(&pp[3], s2, sum, 20, 128);
3690 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3691 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3692 sum -= c[DIAG_DOWN_LEFT_PRED];
3693 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3694 sum -= c[VERT_LEFT_PRED];
3695 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3696 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3700 static void free_buffers(VP9Context *s)
3702 av_freep(&s->intra_pred_data[0]);
3703 av_freep(&s->b_base);
3704 av_freep(&s->block_base);
3707 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3709 VP9Context *s = ctx->priv_data;
3712 for (i = 0; i < 3; i++) {
3713 if (s->frames[i].tf.f->data[0])
3714 vp9_unref_frame(ctx, &s->frames[i]);
3715 av_frame_free(&s->frames[i].tf.f);
3717 for (i = 0; i < 8; i++) {
3718 if (s->refs[i].f->data[0])
3719 ff_thread_release_buffer(ctx, &s->refs[i]);
3720 av_frame_free(&s->refs[i].f);
3721 if (s->next_refs[i].f->data[0])
3722 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3723 av_frame_free(&s->next_refs[i].f);
3733 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3734 int *got_frame, AVPacket *pkt)
3736 const uint8_t *data = pkt->data;
3737 int size = pkt->size;
3738 VP9Context *s = ctx->priv_data;
3739 int res, tile_row, tile_col, i, ref, row, col;
3740 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3741 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3744 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3746 } else if (res == 0) {
3747 if (!s->refs[ref].f->data[0]) {
3748 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3749 return AVERROR_INVALIDDATA;
3751 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3753 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3754 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3755 for (i = 0; i < 8; i++) {
3756 if (s->next_refs[i].f->data[0])
3757 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3758 if (s->refs[i].f->data[0] &&
3759 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
3768 if (!retain_segmap_ref) {
3769 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3770 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3771 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3772 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3775 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3776 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3777 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3778 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3780 if (s->frames[CUR_FRAME].tf.f->data[0])
3781 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3782 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3784 f = s->frames[CUR_FRAME].tf.f;
3785 f->key_frame = s->keyframe;
3786 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3787 ls_y = f->linesize[0];
3788 ls_uv =f->linesize[1];
3791 for (i = 0; i < 8; i++) {
3792 if (s->next_refs[i].f->data[0])
3793 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3794 if (s->refreshrefmask & (1 << i)) {
3795 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3796 } else if (s->refs[i].f->data[0]) {
3797 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3803 // main tile decode loop
3804 memset(s->above_partition_ctx, 0, s->cols);
3805 memset(s->above_skip_ctx, 0, s->cols);
3806 if (s->keyframe || s->intraonly) {
3807 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3809 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3811 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3812 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
3813 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
3814 memset(s->above_segpred_ctx, 0, s->cols);
3815 s->pass = s->frames[CUR_FRAME].uses_2pass =
3816 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3817 if ((res = update_block_buffers(ctx)) < 0) {
3818 av_log(ctx, AV_LOG_ERROR,
3819 "Failed to allocate block buffers\n");
3822 if (s->refreshctx && s->parallelmode) {
3825 for (i = 0; i < 4; i++) {
3826 for (j = 0; j < 2; j++)
3827 for (k = 0; k < 2; k++)
3828 for (l = 0; l < 6; l++)
3829 for (m = 0; m < 6; m++)
3830 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3831 s->prob.coef[i][j][k][l][m], 3);
3832 if (s->txfmmode == i)
3835 s->prob_ctx[s->framectxid].p = s->prob.p;
3836 ff_thread_finish_setup(ctx);
3837 } else if (!s->refreshctx) {
3838 ff_thread_finish_setup(ctx);
3844 s->block = s->block_base;
3845 s->uvblock[0] = s->uvblock_base[0];
3846 s->uvblock[1] = s->uvblock_base[1];
3847 s->eob = s->eob_base;
3848 s->uveob[0] = s->uveob_base[0];
3849 s->uveob[1] = s->uveob_base[1];
3851 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3852 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3853 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3855 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3858 if (tile_col == s->tiling.tile_cols - 1 &&
3859 tile_row == s->tiling.tile_rows - 1) {
3862 tile_size = AV_RB32(data);
3866 if (tile_size > size) {
3867 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3868 return AVERROR_INVALIDDATA;
3870 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3871 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3872 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3873 return AVERROR_INVALIDDATA;
3880 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3881 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
3882 struct VP9Filter *lflvl_ptr = s->lflvl;
3883 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3885 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3886 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3887 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3890 memset(s->left_partition_ctx, 0, 8);
3891 memset(s->left_skip_ctx, 0, 8);
3892 if (s->keyframe || s->intraonly) {
3893 memset(s->left_mode_ctx, DC_PRED, 16);
3895 memset(s->left_mode_ctx, NEARESTMV, 8);
3897 memset(s->left_y_nnz_ctx, 0, 16);
3898 memset(s->left_uv_nnz_ctx, 0, 32);
3899 memset(s->left_segpred_ctx, 0, 8);
3901 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3904 for (col = s->tiling.tile_col_start;
3905 col < s->tiling.tile_col_end;
3906 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
3907 // FIXME integrate with lf code (i.e. zero after each
3908 // use, similar to invtxfm coefficients, or similar)
3910 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3914 decode_sb_mem(ctx, row, col, lflvl_ptr,
3915 yoff2, uvoff2, BL_64X64);
3917 decode_sb(ctx, row, col, lflvl_ptr,
3918 yoff2, uvoff2, BL_64X64);
3922 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3930 // backup pre-loopfilter reconstruction data for intra
3931 // prediction of next row of sb64s
3932 if (row + 8 < s->rows) {
3933 memcpy(s->intra_pred_data[0],
3934 f->data[0] + yoff + 63 * ls_y,
3936 memcpy(s->intra_pred_data[1],
3937 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
3938 8 * s->cols >> s->ss_h);
3939 memcpy(s->intra_pred_data[2],
3940 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
3941 8 * s->cols >> s->ss_h);
3944 // loopfilter one row
3945 if (s->filter.level) {
3948 lflvl_ptr = s->lflvl;
3949 for (col = 0; col < s->cols;
3950 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
3951 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3955 // FIXME maybe we can make this more finegrained by running the
3956 // loopfilter per-block instead of after each sbrow
3957 // In fact that would also make intra pred left preparation easier?
3958 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3962 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3964 ff_thread_finish_setup(ctx);
3966 } while (s->pass++ == 1);
3967 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3970 for (i = 0; i < 8; i++) {
3971 if (s->refs[i].f->data[0])
3972 ff_thread_release_buffer(ctx, &s->refs[i]);
3973 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3976 if (!s->invisible) {
3977 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3985 static void vp9_decode_flush(AVCodecContext *ctx)
3987 VP9Context *s = ctx->priv_data;
3990 for (i = 0; i < 3; i++)
3991 vp9_unref_frame(ctx, &s->frames[i]);
3992 for (i = 0; i < 8; i++)
3993 ff_thread_release_buffer(ctx, &s->refs[i]);
3996 static int init_frames(AVCodecContext *ctx)
3998 VP9Context *s = ctx->priv_data;
4001 for (i = 0; i < 3; i++) {
4002 s->frames[i].tf.f = av_frame_alloc();
4003 if (!s->frames[i].tf.f) {
4004 vp9_decode_free(ctx);
4005 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4006 return AVERROR(ENOMEM);
4009 for (i = 0; i < 8; i++) {
4010 s->refs[i].f = av_frame_alloc();
4011 s->next_refs[i].f = av_frame_alloc();
4012 if (!s->refs[i].f || !s->next_refs[i].f) {
4013 vp9_decode_free(ctx);
4014 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4015 return AVERROR(ENOMEM);
4022 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4024 VP9Context *s = ctx->priv_data;
4026 ctx->internal->allocate_progress = 1;
4027 ff_vp9dsp_init(&s->dsp);
4028 ff_videodsp_init(&s->vdsp, 8);
4029 s->filter.sharpness = -1;
4031 return init_frames(ctx);
4034 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4036 return init_frames(avctx);
4039 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4042 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4044 // detect size changes in other threads
4045 if (s->intra_pred_data[0] &&
4046 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4050 for (i = 0; i < 3; i++) {
4051 if (s->frames[i].tf.f->data[0])
4052 vp9_unref_frame(dst, &s->frames[i]);
4053 if (ssrc->frames[i].tf.f->data[0]) {
4054 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4058 for (i = 0; i < 8; i++) {
4059 if (s->refs[i].f->data[0])
4060 ff_thread_release_buffer(dst, &s->refs[i]);
4061 if (ssrc->next_refs[i].f->data[0]) {
4062 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4067 s->invisible = ssrc->invisible;
4068 s->keyframe = ssrc->keyframe;
4069 s->ss_v = ssrc->ss_v;
4070 s->ss_h = ssrc->ss_h;
4071 s->segmentation.enabled = ssrc->segmentation.enabled;
4072 s->segmentation.update_map = ssrc->segmentation.update_map;
4073 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4074 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4075 if (ssrc->segmentation.enabled) {
4076 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4077 sizeof(s->segmentation.feat));
4083 AVCodec ff_vp9_decoder = {
4085 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4086 .type = AVMEDIA_TYPE_VIDEO,
4087 .id = AV_CODEC_ID_VP9,
4088 .priv_data_size = sizeof(VP9Context),
4089 .init = vp9_decode_init,
4090 .close = vp9_decode_free,
4091 .decode = vp9_decode_frame,
4092 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4093 .flush = vp9_decode_flush,
4094 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4095 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),