2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
113 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
131 #define REF_FRAME_MVPAIR 1
132 #define REF_FRAME_SEGMAP 2
139 uint8_t mblim_lut[64];
147 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
149 #define MAX_SEGMENT 8
153 uint8_t absolute_vals;
159 uint8_t skip_enabled;
168 unsigned log2_tile_cols, log2_tile_rows;
169 unsigned tile_cols, tile_rows;
170 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
172 unsigned sb_cols, sb_rows, rows, cols;
175 uint8_t coef[4][2][2][6][6][3];
179 uint8_t coef[4][2][2][6][6][11];
184 unsigned y_mode[4][10];
185 unsigned uv_mode[10][10];
186 unsigned filter[4][3];
187 unsigned mv_mode[7][4];
188 unsigned intra[4][2];
190 unsigned single_ref[5][2][2];
191 unsigned comp_ref[5][2];
192 unsigned tx32p[2][4];
193 unsigned tx16p[2][3];
196 unsigned mv_joint[4];
199 unsigned classes[11];
201 unsigned bits[10][2];
202 unsigned class0_fp[2][4];
204 unsigned class0_hp[2];
207 unsigned partition[4][4][4];
208 unsigned coef[4][2][2][6][6][3];
209 unsigned eob[4][2][2][6][6][2];
211 enum TxfmMode txfmmode;
212 enum CompPredMode comppredmode;
214 // contextual (left/above) cache
215 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
216 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
217 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
218 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
219 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
227 uint8_t *above_partition_ctx;
228 uint8_t *above_mode_ctx;
229 // FIXME maybe merge some of the below in a flags field?
230 uint8_t *above_y_nnz_ctx;
231 uint8_t *above_uv_nnz_ctx[2];
232 uint8_t *above_skip_ctx; // 1bit
233 uint8_t *above_txfm_ctx; // 2bit
234 uint8_t *above_segpred_ctx; // 1bit
235 uint8_t *above_intra_ctx; // 1bit
236 uint8_t *above_comp_ctx; // 1bit
237 uint8_t *above_ref_ctx; // 2bit
238 uint8_t *above_filter_ctx;
239 VP56mv (*above_mv_ctx)[2];
242 uint8_t *intra_pred_data[3];
243 struct VP9Filter *lflvl;
244 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144];
246 // block reconstruction intermediates
247 int block_alloc_using_2pass;
248 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
249 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
250 struct { int x, y; } min_mv, max_mv;
251 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
252 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
253 uint16_t mvscale[3][2];
254 uint8_t mvstep[3][2];
257 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
259 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
260 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
262 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
263 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
267 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
269 VP9Context *s = ctx->priv_data;
272 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
274 sz = 64 * s->sb_cols * s->sb_rows;
275 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
276 ff_thread_release_buffer(ctx, &f->tf);
277 return AVERROR(ENOMEM);
280 f->segmentation_map = f->extradata->data;
281 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
286 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
288 ff_thread_release_buffer(ctx, &f->tf);
289 av_buffer_unref(&f->extradata);
292 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
296 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
298 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
299 vp9_unref_frame(ctx, dst);
300 return AVERROR(ENOMEM);
303 dst->segmentation_map = src->segmentation_map;
305 dst->uses_2pass = src->uses_2pass;
310 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
312 VP9Context *s = ctx->priv_data;
315 av_assert0(w > 0 && h > 0);
317 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 // FIXME we slightly over-allocate here for subsampled chroma, but a little
331 // bit of padding shouldn't affect performance...
332 p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
334 return AVERROR(ENOMEM);
335 assign(s->intra_pred_data[0], uint8_t *, 64);
336 assign(s->intra_pred_data[1], uint8_t *, 64);
337 assign(s->intra_pred_data[2], uint8_t *, 64);
338 assign(s->above_y_nnz_ctx, uint8_t *, 16);
339 assign(s->above_mode_ctx, uint8_t *, 16);
340 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
341 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
342 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
343 assign(s->above_partition_ctx, uint8_t *, 8);
344 assign(s->above_skip_ctx, uint8_t *, 8);
345 assign(s->above_txfm_ctx, uint8_t *, 8);
346 assign(s->above_segpred_ctx, uint8_t *, 8);
347 assign(s->above_intra_ctx, uint8_t *, 8);
348 assign(s->above_comp_ctx, uint8_t *, 8);
349 assign(s->above_ref_ctx, uint8_t *, 8);
350 assign(s->above_filter_ctx, uint8_t *, 8);
351 assign(s->lflvl, struct VP9Filter *, 1);
354 // these will be re-allocated a little later
355 av_freep(&s->b_base);
356 av_freep(&s->block_base);
361 static int update_block_buffers(AVCodecContext *ctx)
363 VP9Context *s = ctx->priv_data;
364 int chroma_blocks, chroma_eobs;
366 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
370 av_free(s->block_base);
371 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
372 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
373 if (s->frames[CUR_FRAME].uses_2pass) {
374 int sbs = s->sb_cols * s->sb_rows;
376 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
377 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
378 16 * 16 + 2 * chroma_eobs) * sbs);
379 if (!s->b_base || !s->block_base)
380 return AVERROR(ENOMEM);
381 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
382 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
383 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
384 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
385 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
387 s->b_base = av_malloc(sizeof(VP9Block));
388 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
389 16 * 16 + 2 * chroma_eobs);
390 if (!s->b_base || !s->block_base)
391 return AVERROR(ENOMEM);
392 s->uvblock_base[0] = s->block_base + 64 * 64;
393 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
394 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
395 s->uveob_base[0] = s->eob_base + 16 * 16;
396 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
398 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
403 // for some reason the sign bit is at the end, not the start, of a bit sequence
404 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
406 int v = get_bits(gb, n);
407 return get_bits1(gb) ? -v : v;
410 static av_always_inline int inv_recenter_nonneg(int v, int m)
412 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
415 // differential forward probability updates
416 static int update_prob(VP56RangeCoder *c, int p)
418 static const int inv_map_table[254] = {
419 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
420 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
421 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
422 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
423 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
424 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
425 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
426 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
427 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
428 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
429 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
430 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
431 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
432 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
433 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
434 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
435 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
436 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
441 /* This code is trying to do a differential probability update. For a
442 * current probability A in the range [1, 255], the difference to a new
443 * probability of any value can be expressed differentially as 1-A,255-A
444 * where some part of this (absolute range) exists both in positive as
445 * well as the negative part, whereas another part only exists in one
446 * half. We're trying to code this shared part differentially, i.e.
447 * times two where the value of the lowest bit specifies the sign, and
448 * the single part is then coded on top of this. This absolute difference
449 * then again has a value of [0,254], but a bigger value in this range
450 * indicates that we're further away from the original value A, so we
451 * can code this as a VLC code, since higher values are increasingly
452 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
453 * updates vs. the 'fine, exact' updates further down the range, which
454 * adds one extra dimension to this differential update model. */
456 if (!vp8_rac_get(c)) {
457 d = vp8_rac_get_uint(c, 4) + 0;
458 } else if (!vp8_rac_get(c)) {
459 d = vp8_rac_get_uint(c, 4) + 16;
460 } else if (!vp8_rac_get(c)) {
461 d = vp8_rac_get_uint(c, 5) + 32;
463 d = vp8_rac_get_uint(c, 7);
465 d = (d << 1) - 65 + vp8_rac_get(c);
469 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
470 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
473 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
475 static const enum AVColorSpace colorspaces[8] = {
476 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
477 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
479 VP9Context *s = ctx->priv_data;
480 enum AVPixelFormat res;
482 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
483 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
484 if (ctx->profile == 1) {
485 s->ss_h = s->ss_v = 1;
486 res = AV_PIX_FMT_GBRP;
487 ctx->color_range = AVCOL_RANGE_JPEG;
489 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
490 return AVERROR_INVALIDDATA;
493 static const enum AVPixelFormat pix_fmt_for_ss[2 /* v */][2 /* h */] = {
494 { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
495 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P },
497 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
498 if (ctx->profile == 1) {
499 s->ss_h = get_bits1(&s->gb);
500 s->ss_v = get_bits1(&s->gb);
501 if ((res = pix_fmt_for_ss[s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
502 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile 1\n");
503 return AVERROR_INVALIDDATA;
504 } else if (get_bits1(&s->gb)) {
505 av_log(ctx, AV_LOG_ERROR, "Profile 1 color details reserved bit set\n");
506 return AVERROR_INVALIDDATA;
509 s->ss_h = s->ss_v = 1;
510 res = AV_PIX_FMT_YUV420P;
517 static int decode_frame_header(AVCodecContext *ctx,
518 const uint8_t *data, int size, int *ref)
520 VP9Context *s = ctx->priv_data;
521 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
522 enum AVPixelFormat fmt = ctx->pix_fmt;
524 const uint8_t *data2;
527 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
528 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
531 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
532 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
533 return AVERROR_INVALIDDATA;
535 ctx->profile = get_bits1(&s->gb);
536 ctx->profile |= get_bits1(&s->gb) << 1;
537 if (ctx->profile > 1) {
538 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
539 return AVERROR_INVALIDDATA;
541 if (get_bits1(&s->gb)) {
542 *ref = get_bits(&s->gb, 3);
545 s->last_keyframe = s->keyframe;
546 s->keyframe = !get_bits1(&s->gb);
547 last_invisible = s->invisible;
548 s->invisible = !get_bits1(&s->gb);
549 s->errorres = get_bits1(&s->gb);
550 s->use_last_frame_mvs = !s->errorres && !last_invisible;
552 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
553 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
554 return AVERROR_INVALIDDATA;
556 if ((fmt = read_colorspace_details(ctx)) < 0)
558 // for profile 1, here follows the subsampling bits
559 s->refreshrefmask = 0xff;
560 w = get_bits(&s->gb, 16) + 1;
561 h = get_bits(&s->gb, 16) + 1;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
565 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
566 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
568 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
569 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
570 return AVERROR_INVALIDDATA;
572 if (ctx->profile == 1) {
573 if ((fmt = read_colorspace_details(ctx)) < 0)
576 s->ss_h = s->ss_v = 1;
577 fmt = AV_PIX_FMT_YUV420P;
578 ctx->colorspace = AVCOL_SPC_BT470BG;
579 ctx->color_range = AVCOL_RANGE_JPEG;
581 s->refreshrefmask = get_bits(&s->gb, 8);
582 w = get_bits(&s->gb, 16) + 1;
583 h = get_bits(&s->gb, 16) + 1;
584 if (get_bits1(&s->gb)) // display size
585 skip_bits(&s->gb, 32);
587 s->refreshrefmask = get_bits(&s->gb, 8);
588 s->refidx[0] = get_bits(&s->gb, 3);
589 s->signbias[0] = get_bits1(&s->gb);
590 s->refidx[1] = get_bits(&s->gb, 3);
591 s->signbias[1] = get_bits1(&s->gb);
592 s->refidx[2] = get_bits(&s->gb, 3);
593 s->signbias[2] = get_bits1(&s->gb);
594 if (!s->refs[s->refidx[0]].f->data[0] ||
595 !s->refs[s->refidx[1]].f->data[0] ||
596 !s->refs[s->refidx[2]].f->data[0]) {
597 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
598 return AVERROR_INVALIDDATA;
600 if (get_bits1(&s->gb)) {
601 w = s->refs[s->refidx[0]].f->width;
602 h = s->refs[s->refidx[0]].f->height;
603 } else if (get_bits1(&s->gb)) {
604 w = s->refs[s->refidx[1]].f->width;
605 h = s->refs[s->refidx[1]].f->height;
606 } else if (get_bits1(&s->gb)) {
607 w = s->refs[s->refidx[2]].f->width;
608 h = s->refs[s->refidx[2]].f->height;
610 w = get_bits(&s->gb, 16) + 1;
611 h = get_bits(&s->gb, 16) + 1;
613 // Note that in this code, "CUR_FRAME" is actually before we
614 // have formally allocated a frame, and thus actually represents
616 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
617 s->frames[CUR_FRAME].tf.f->height == h;
618 if (get_bits1(&s->gb)) // display size
619 skip_bits(&s->gb, 32);
620 s->highprecisionmvs = get_bits1(&s->gb);
621 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
623 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
624 s->signbias[0] != s->signbias[2];
625 if (s->allowcompinter) {
626 if (s->signbias[0] == s->signbias[1]) {
628 s->varcompref[0] = 0;
629 s->varcompref[1] = 1;
630 } else if (s->signbias[0] == s->signbias[2]) {
632 s->varcompref[0] = 0;
633 s->varcompref[1] = 2;
636 s->varcompref[0] = 1;
637 s->varcompref[1] = 2;
641 for (i = 0; i < 3; i++) {
642 AVFrame *ref = s->refs[s->refidx[i]].f;
643 int refw = ref->width, refh = ref->height;
645 if (ref->format != fmt) {
646 av_log(ctx, AV_LOG_ERROR,
647 "Ref pixfmt (%s) did not match current frame (%s)",
648 av_get_pix_fmt_name(ref->format),
649 av_get_pix_fmt_name(fmt));
650 return AVERROR_INVALIDDATA;
651 } else if (refw == w && refh == h) {
652 s->mvscale[i][0] = s->mvscale[i][1] = 0;
654 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
655 av_log(ctx, AV_LOG_ERROR,
656 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
658 return AVERROR_INVALIDDATA;
660 s->mvscale[i][0] = (refw << 14) / w;
661 s->mvscale[i][1] = (refh << 14) / h;
662 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
663 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
668 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
669 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
670 s->framectxid = c = get_bits(&s->gb, 2);
672 /* loopfilter header data */
673 s->filter.level = get_bits(&s->gb, 6);
674 sharp = get_bits(&s->gb, 3);
675 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
676 // the old cache values since they are still valid
677 if (s->filter.sharpness != sharp)
678 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
679 s->filter.sharpness = sharp;
680 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
681 if (get_bits1(&s->gb)) {
682 for (i = 0; i < 4; i++)
683 if (get_bits1(&s->gb))
684 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
685 for (i = 0; i < 2; i++)
686 if (get_bits1(&s->gb))
687 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
691 /* quantization header data */
692 s->yac_qi = get_bits(&s->gb, 8);
693 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
694 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
695 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
696 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
697 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
699 /* segmentation header info */
700 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
701 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
702 for (i = 0; i < 7; i++)
703 s->prob.seg[i] = get_bits1(&s->gb) ?
704 get_bits(&s->gb, 8) : 255;
705 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
706 for (i = 0; i < 3; i++)
707 s->prob.segpred[i] = get_bits1(&s->gb) ?
708 get_bits(&s->gb, 8) : 255;
711 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
712 (w != s->frames[CUR_FRAME].tf.f->width ||
713 h != s->frames[CUR_FRAME].tf.f->height)) {
714 av_log(ctx, AV_LOG_ERROR,
715 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
716 s->segmentation.temporal, s->segmentation.update_map);
717 return AVERROR_INVALIDDATA;
720 if (get_bits1(&s->gb)) {
721 s->segmentation.absolute_vals = get_bits1(&s->gb);
722 for (i = 0; i < 8; i++) {
723 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
724 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
725 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
726 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
727 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
728 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
729 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
733 s->segmentation.feat[0].q_enabled = 0;
734 s->segmentation.feat[0].lf_enabled = 0;
735 s->segmentation.feat[0].skip_enabled = 0;
736 s->segmentation.feat[0].ref_enabled = 0;
739 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
740 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
741 int qyac, qydc, quvac, quvdc, lflvl, sh;
743 if (s->segmentation.feat[i].q_enabled) {
744 if (s->segmentation.absolute_vals)
745 qyac = s->segmentation.feat[i].q_val;
747 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
751 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
752 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
753 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
754 qyac = av_clip_uintp2(qyac, 8);
756 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
757 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
758 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
759 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
761 sh = s->filter.level >= 32;
762 if (s->segmentation.feat[i].lf_enabled) {
763 if (s->segmentation.absolute_vals)
764 lflvl = s->segmentation.feat[i].lf_val;
766 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
768 lflvl = s->filter.level;
770 if (s->lf_delta.enabled) {
771 s->segmentation.feat[i].lflvl[0][0] =
772 s->segmentation.feat[i].lflvl[0][1] =
773 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
774 for (j = 1; j < 4; j++) {
775 s->segmentation.feat[i].lflvl[j][0] =
776 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
777 s->lf_delta.mode[0]) * (1 << sh)), 6);
778 s->segmentation.feat[i].lflvl[j][1] =
779 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
780 s->lf_delta.mode[1]) * (1 << sh)), 6);
783 memset(s->segmentation.feat[i].lflvl, lflvl,
784 sizeof(s->segmentation.feat[i].lflvl));
789 if ((res = update_size(ctx, w, h, fmt)) < 0) {
790 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
793 for (s->tiling.log2_tile_cols = 0;
794 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
795 s->tiling.log2_tile_cols++) ;
796 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
797 max = FFMAX(0, max - 1);
798 while (max > s->tiling.log2_tile_cols) {
799 if (get_bits1(&s->gb))
800 s->tiling.log2_tile_cols++;
804 s->tiling.log2_tile_rows = decode012(&s->gb);
805 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
806 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
807 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
808 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
809 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
811 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
812 return AVERROR(ENOMEM);
816 if (s->keyframe || s->errorres || s->intraonly) {
817 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
818 s->prob_ctx[3].p = vp9_default_probs;
819 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
820 sizeof(vp9_default_coef_probs));
821 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
822 sizeof(vp9_default_coef_probs));
823 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
824 sizeof(vp9_default_coef_probs));
825 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
826 sizeof(vp9_default_coef_probs));
829 // next 16 bits is size of the rest of the header (arith-coded)
830 size2 = get_bits(&s->gb, 16);
831 data2 = align_get_bits(&s->gb);
832 if (size2 > size - (data2 - data)) {
833 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
834 return AVERROR_INVALIDDATA;
836 ff_vp56_init_range_decoder(&s->c, data2, size2);
837 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
838 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
839 return AVERROR_INVALIDDATA;
842 if (s->keyframe || s->intraonly) {
843 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
845 memset(&s->counts, 0, sizeof(s->counts));
847 // FIXME is it faster to not copy here, but do it down in the fw updates
848 // as explicit copies if the fw update is missing (and skip the copy upon
850 s->prob.p = s->prob_ctx[c].p;
854 s->txfmmode = TX_4X4;
856 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
857 if (s->txfmmode == 3)
858 s->txfmmode += vp8_rac_get(&s->c);
860 if (s->txfmmode == TX_SWITCHABLE) {
861 for (i = 0; i < 2; i++)
862 if (vp56_rac_get_prob_branchy(&s->c, 252))
863 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
864 for (i = 0; i < 2; i++)
865 for (j = 0; j < 2; j++)
866 if (vp56_rac_get_prob_branchy(&s->c, 252))
867 s->prob.p.tx16p[i][j] =
868 update_prob(&s->c, s->prob.p.tx16p[i][j]);
869 for (i = 0; i < 2; i++)
870 for (j = 0; j < 3; j++)
871 if (vp56_rac_get_prob_branchy(&s->c, 252))
872 s->prob.p.tx32p[i][j] =
873 update_prob(&s->c, s->prob.p.tx32p[i][j]);
878 for (i = 0; i < 4; i++) {
879 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
880 if (vp8_rac_get(&s->c)) {
881 for (j = 0; j < 2; j++)
882 for (k = 0; k < 2; k++)
883 for (l = 0; l < 6; l++)
884 for (m = 0; m < 6; m++) {
885 uint8_t *p = s->prob.coef[i][j][k][l][m];
886 uint8_t *r = ref[j][k][l][m];
887 if (m >= 3 && l == 0) // dc only has 3 pt
889 for (n = 0; n < 3; n++) {
890 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
891 p[n] = update_prob(&s->c, r[n]);
899 for (j = 0; j < 2; j++)
900 for (k = 0; k < 2; k++)
901 for (l = 0; l < 6; l++)
902 for (m = 0; m < 6; m++) {
903 uint8_t *p = s->prob.coef[i][j][k][l][m];
904 uint8_t *r = ref[j][k][l][m];
905 if (m > 3 && l == 0) // dc only has 3 pt
911 if (s->txfmmode == i)
916 for (i = 0; i < 3; i++)
917 if (vp56_rac_get_prob_branchy(&s->c, 252))
918 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
919 if (!s->keyframe && !s->intraonly) {
920 for (i = 0; i < 7; i++)
921 for (j = 0; j < 3; j++)
922 if (vp56_rac_get_prob_branchy(&s->c, 252))
923 s->prob.p.mv_mode[i][j] =
924 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
926 if (s->filtermode == FILTER_SWITCHABLE)
927 for (i = 0; i < 4; i++)
928 for (j = 0; j < 2; j++)
929 if (vp56_rac_get_prob_branchy(&s->c, 252))
930 s->prob.p.filter[i][j] =
931 update_prob(&s->c, s->prob.p.filter[i][j]);
933 for (i = 0; i < 4; i++)
934 if (vp56_rac_get_prob_branchy(&s->c, 252))
935 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
937 if (s->allowcompinter) {
938 s->comppredmode = vp8_rac_get(&s->c);
940 s->comppredmode += vp8_rac_get(&s->c);
941 if (s->comppredmode == PRED_SWITCHABLE)
942 for (i = 0; i < 5; i++)
943 if (vp56_rac_get_prob_branchy(&s->c, 252))
945 update_prob(&s->c, s->prob.p.comp[i]);
947 s->comppredmode = PRED_SINGLEREF;
950 if (s->comppredmode != PRED_COMPREF) {
951 for (i = 0; i < 5; i++) {
952 if (vp56_rac_get_prob_branchy(&s->c, 252))
953 s->prob.p.single_ref[i][0] =
954 update_prob(&s->c, s->prob.p.single_ref[i][0]);
955 if (vp56_rac_get_prob_branchy(&s->c, 252))
956 s->prob.p.single_ref[i][1] =
957 update_prob(&s->c, s->prob.p.single_ref[i][1]);
961 if (s->comppredmode != PRED_SINGLEREF) {
962 for (i = 0; i < 5; i++)
963 if (vp56_rac_get_prob_branchy(&s->c, 252))
964 s->prob.p.comp_ref[i] =
965 update_prob(&s->c, s->prob.p.comp_ref[i]);
968 for (i = 0; i < 4; i++)
969 for (j = 0; j < 9; j++)
970 if (vp56_rac_get_prob_branchy(&s->c, 252))
971 s->prob.p.y_mode[i][j] =
972 update_prob(&s->c, s->prob.p.y_mode[i][j]);
974 for (i = 0; i < 4; i++)
975 for (j = 0; j < 4; j++)
976 for (k = 0; k < 3; k++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.partition[3 - i][j][k] =
979 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
981 // mv fields don't use the update_prob subexp model for some reason
982 for (i = 0; i < 3; i++)
983 if (vp56_rac_get_prob_branchy(&s->c, 252))
984 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
986 for (i = 0; i < 2; i++) {
987 if (vp56_rac_get_prob_branchy(&s->c, 252))
988 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
990 for (j = 0; j < 10; j++)
991 if (vp56_rac_get_prob_branchy(&s->c, 252))
992 s->prob.p.mv_comp[i].classes[j] =
993 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
995 if (vp56_rac_get_prob_branchy(&s->c, 252))
996 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
998 for (j = 0; j < 10; j++)
999 if (vp56_rac_get_prob_branchy(&s->c, 252))
1000 s->prob.p.mv_comp[i].bits[j] =
1001 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1004 for (i = 0; i < 2; i++) {
1005 for (j = 0; j < 2; j++)
1006 for (k = 0; k < 3; k++)
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.mv_comp[i].class0_fp[j][k] =
1009 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1011 for (j = 0; j < 3; j++)
1012 if (vp56_rac_get_prob_branchy(&s->c, 252))
1013 s->prob.p.mv_comp[i].fp[j] =
1014 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 if (s->highprecisionmvs) {
1018 for (i = 0; i < 2; i++) {
1019 if (vp56_rac_get_prob_branchy(&s->c, 252))
1020 s->prob.p.mv_comp[i].class0_hp =
1021 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1023 if (vp56_rac_get_prob_branchy(&s->c, 252))
1024 s->prob.p.mv_comp[i].hp =
1025 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1030 return (data2 - data) + size2;
1033 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1036 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1037 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1040 static void find_ref_mvs(VP9Context *s,
1041 VP56mv *pmv, int ref, int z, int idx, int sb)
1043 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1044 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1045 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1046 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1047 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1048 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1049 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1050 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1051 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1052 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1053 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1054 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1055 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1056 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1057 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1058 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1059 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1060 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1061 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1062 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1063 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1064 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1065 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1066 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1067 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1068 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1069 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1072 int row = s->row, col = s->col, row7 = s->row7;
1073 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1074 #define INVALID_MV 0x80008000U
1075 uint32_t mem = INVALID_MV;
1078 #define RETURN_DIRECT_MV(mv) \
1080 uint32_t m = AV_RN32A(&mv); \
1084 } else if (mem == INVALID_MV) { \
1086 } else if (m != mem) { \
1093 if (sb == 2 || sb == 1) {
1094 RETURN_DIRECT_MV(b->mv[0][z]);
1095 } else if (sb == 3) {
1096 RETURN_DIRECT_MV(b->mv[2][z]);
1097 RETURN_DIRECT_MV(b->mv[1][z]);
1098 RETURN_DIRECT_MV(b->mv[0][z]);
1101 #define RETURN_MV(mv) \
1106 clamp_mv(&tmp, &mv, s); \
1107 m = AV_RN32A(&tmp); \
1111 } else if (mem == INVALID_MV) { \
1113 } else if (m != mem) { \
1118 uint32_t m = AV_RN32A(&mv); \
1120 clamp_mv(pmv, &mv, s); \
1122 } else if (mem == INVALID_MV) { \
1124 } else if (m != mem) { \
1125 clamp_mv(pmv, &mv, s); \
1132 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1133 if (mv->ref[0] == ref) {
1134 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1135 } else if (mv->ref[1] == ref) {
1136 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1139 if (col > s->tiling.tile_col_start) {
1140 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1141 if (mv->ref[0] == ref) {
1142 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1143 } else if (mv->ref[1] == ref) {
1144 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1152 // previously coded MVs in this neighbourhood, using same reference frame
1153 for (; i < 8; i++) {
1154 int c = p[i][0] + col, r = p[i][1] + row;
1156 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1157 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1159 if (mv->ref[0] == ref) {
1160 RETURN_MV(mv->mv[0]);
1161 } else if (mv->ref[1] == ref) {
1162 RETURN_MV(mv->mv[1]);
1167 // MV at this position in previous frame, using same reference frame
1168 if (s->use_last_frame_mvs) {
1169 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1171 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1172 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1173 if (mv->ref[0] == ref) {
1174 RETURN_MV(mv->mv[0]);
1175 } else if (mv->ref[1] == ref) {
1176 RETURN_MV(mv->mv[1]);
1180 #define RETURN_SCALE_MV(mv, scale) \
1183 VP56mv mv_temp = { -mv.x, -mv.y }; \
1184 RETURN_MV(mv_temp); \
1190 // previously coded MVs in this neighbourhood, using different reference frame
1191 for (i = 0; i < 8; i++) {
1192 int c = p[i][0] + col, r = p[i][1] + row;
1194 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1195 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1197 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1198 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1200 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1201 // BUG - libvpx has this condition regardless of whether
1202 // we used the first ref MV and pre-scaling
1203 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1204 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1209 // MV at this position in previous frame, using different reference frame
1210 if (s->use_last_frame_mvs) {
1211 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1213 // no need to await_progress, because we already did that above
1214 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1215 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1217 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1218 // BUG - libvpx has this condition regardless of whether
1219 // we used the first ref MV and pre-scaling
1220 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1221 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1228 #undef RETURN_SCALE_MV
1231 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1233 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1234 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1235 s->prob.p.mv_comp[idx].classes);
1237 s->counts.mv_comp[idx].sign[sign]++;
1238 s->counts.mv_comp[idx].classes[c]++;
1242 for (n = 0, m = 0; m < c; m++) {
1243 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1245 s->counts.mv_comp[idx].bits[m][bit]++;
1248 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1250 s->counts.mv_comp[idx].fp[bit]++;
1252 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1253 s->counts.mv_comp[idx].hp[bit]++;
1257 // bug in libvpx - we count for bw entropy purposes even if the
1259 s->counts.mv_comp[idx].hp[1]++;
1263 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1264 s->counts.mv_comp[idx].class0[n]++;
1265 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1266 s->prob.p.mv_comp[idx].class0_fp[n]);
1267 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1268 n = (n << 3) | (bit << 1);
1270 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1271 s->counts.mv_comp[idx].class0_hp[bit]++;
1275 // bug in libvpx - we count for bw entropy purposes even if the
1277 s->counts.mv_comp[idx].class0_hp[1]++;
1281 return sign ? -(n + 1) : (n + 1);
1284 static void fill_mv(VP9Context *s,
1285 VP56mv *mv, int mode, int sb)
1289 if (mode == ZEROMV) {
1294 // FIXME cache this value and reuse for other subblocks
1295 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1296 mode == NEWMV ? -1 : sb);
1297 // FIXME maybe move this code into find_ref_mvs()
1298 if ((mode == NEWMV || sb == -1) &&
1299 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1313 if (mode == NEWMV) {
1314 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1315 s->prob.p.mv_joint);
1317 s->counts.mv_joint[j]++;
1318 if (j >= MV_JOINT_V)
1319 mv[0].y += read_mv_component(s, 0, hp);
1321 mv[0].x += read_mv_component(s, 1, hp);
1325 // FIXME cache this value and reuse for other subblocks
1326 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1327 mode == NEWMV ? -1 : sb);
1328 if ((mode == NEWMV || sb == -1) &&
1329 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1343 if (mode == NEWMV) {
1344 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1345 s->prob.p.mv_joint);
1347 s->counts.mv_joint[j]++;
1348 if (j >= MV_JOINT_V)
1349 mv[1].y += read_mv_component(s, 0, hp);
1351 mv[1].x += read_mv_component(s, 1, hp);
1357 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1358 ptrdiff_t stride, int v)
1368 int v16 = v * 0x0101;
1376 uint32_t v32 = v * 0x01010101;
1385 uint64_t v64 = v * 0x0101010101010101ULL;
1391 uint32_t v32 = v * 0x01010101;
1394 AV_WN32A(ptr + 4, v32);
1403 static void decode_mode(AVCodecContext *ctx)
1405 static const uint8_t left_ctx[N_BS_SIZES] = {
1406 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1408 static const uint8_t above_ctx[N_BS_SIZES] = {
1409 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1411 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1412 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1413 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1415 VP9Context *s = ctx->priv_data;
1417 int row = s->row, col = s->col, row7 = s->row7;
1418 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1419 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1420 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1421 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1422 int vref, filter_id;
1424 if (!s->segmentation.enabled) {
1426 } else if (s->keyframe || s->intraonly) {
1427 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1428 } else if (!s->segmentation.update_map ||
1429 (s->segmentation.temporal &&
1430 vp56_rac_get_prob_branchy(&s->c,
1431 s->prob.segpred[s->above_segpred_ctx[col] +
1432 s->left_segpred_ctx[row7]]))) {
1435 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1437 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1438 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1439 for (y = 0; y < h4; y++) {
1440 int idx_base = (y + row) * 8 * s->sb_cols + col;
1441 for (x = 0; x < w4; x++)
1442 pred = FFMIN(pred, refsegmap[idx_base + x]);
1444 av_assert1(pred < 8);
1450 memset(&s->above_segpred_ctx[col], 1, w4);
1451 memset(&s->left_segpred_ctx[row7], 1, h4);
1453 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1456 memset(&s->above_segpred_ctx[col], 0, w4);
1457 memset(&s->left_segpred_ctx[row7], 0, h4);
1459 if (s->segmentation.enabled &&
1460 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1461 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1462 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1465 b->skip = s->segmentation.enabled &&
1466 s->segmentation.feat[b->seg_id].skip_enabled;
1468 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1469 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1470 s->counts.skip[c][b->skip]++;
1473 if (s->keyframe || s->intraonly) {
1475 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1476 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1480 if (have_a && have_l) {
1481 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1484 c = have_a ? 2 * s->above_intra_ctx[col] :
1485 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1487 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1488 s->counts.intra[c][bit]++;
1492 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1496 c = (s->above_skip_ctx[col] ? max_tx :
1497 s->above_txfm_ctx[col]) +
1498 (s->left_skip_ctx[row7] ? max_tx :
1499 s->left_txfm_ctx[row7]) > max_tx;
1501 c = s->above_skip_ctx[col] ? 1 :
1502 (s->above_txfm_ctx[col] * 2 > max_tx);
1504 } else if (have_l) {
1505 c = s->left_skip_ctx[row7] ? 1 :
1506 (s->left_txfm_ctx[row7] * 2 > max_tx);
1512 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1514 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1516 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1518 s->counts.tx32p[c][b->tx]++;
1521 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1523 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1524 s->counts.tx16p[c][b->tx]++;
1527 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1528 s->counts.tx8p[c][b->tx]++;
1535 b->tx = FFMIN(max_tx, s->txfmmode);
1538 if (s->keyframe || s->intraonly) {
1539 uint8_t *a = &s->above_mode_ctx[col * 2];
1540 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1543 if (b->bs > BS_8x8) {
1544 // FIXME the memory storage intermediates here aren't really
1545 // necessary, they're just there to make the code slightly
1547 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1548 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1549 if (b->bs != BS_8x4) {
1550 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1551 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1552 l[0] = a[1] = b->mode[1];
1554 l[0] = a[1] = b->mode[1] = b->mode[0];
1556 if (b->bs != BS_4x8) {
1557 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1558 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1559 if (b->bs != BS_8x4) {
1560 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1561 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1562 l[1] = a[1] = b->mode[3];
1564 l[1] = a[1] = b->mode[3] = b->mode[2];
1567 b->mode[2] = b->mode[0];
1568 l[1] = a[1] = b->mode[3] = b->mode[1];
1571 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1572 vp9_default_kf_ymode_probs[*a][*l]);
1573 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1574 // FIXME this can probably be optimized
1575 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1576 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1578 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1579 vp9_default_kf_uvmode_probs[b->mode[3]]);
1580 } else if (b->intra) {
1582 if (b->bs > BS_8x8) {
1583 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1584 s->prob.p.y_mode[0]);
1585 s->counts.y_mode[0][b->mode[0]]++;
1586 if (b->bs != BS_8x4) {
1587 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1588 s->prob.p.y_mode[0]);
1589 s->counts.y_mode[0][b->mode[1]]++;
1591 b->mode[1] = b->mode[0];
1593 if (b->bs != BS_4x8) {
1594 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1595 s->prob.p.y_mode[0]);
1596 s->counts.y_mode[0][b->mode[2]]++;
1597 if (b->bs != BS_8x4) {
1598 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1599 s->prob.p.y_mode[0]);
1600 s->counts.y_mode[0][b->mode[3]]++;
1602 b->mode[3] = b->mode[2];
1605 b->mode[2] = b->mode[0];
1606 b->mode[3] = b->mode[1];
1609 static const uint8_t size_group[10] = {
1610 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1612 int sz = size_group[b->bs];
1614 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1615 s->prob.p.y_mode[sz]);
1616 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1617 s->counts.y_mode[sz][b->mode[3]]++;
1619 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1620 s->prob.p.uv_mode[b->mode[3]]);
1621 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1623 static const uint8_t inter_mode_ctx_lut[14][14] = {
1624 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1625 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1626 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1627 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1628 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1629 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1630 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1631 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1632 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1633 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1634 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1635 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1636 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1637 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1640 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1641 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1643 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1645 // read comp_pred flag
1646 if (s->comppredmode != PRED_SWITCHABLE) {
1647 b->comp = s->comppredmode == PRED_COMPREF;
1651 // FIXME add intra as ref=0xff (or -1) to make these easier?
1654 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1656 } else if (s->above_comp_ctx[col]) {
1657 c = 2 + (s->left_intra_ctx[row7] ||
1658 s->left_ref_ctx[row7] == s->fixcompref);
1659 } else if (s->left_comp_ctx[row7]) {
1660 c = 2 + (s->above_intra_ctx[col] ||
1661 s->above_ref_ctx[col] == s->fixcompref);
1663 c = (!s->above_intra_ctx[col] &&
1664 s->above_ref_ctx[col] == s->fixcompref) ^
1665 (!s->left_intra_ctx[row7] &&
1666 s->left_ref_ctx[row & 7] == s->fixcompref);
1669 c = s->above_comp_ctx[col] ? 3 :
1670 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1672 } else if (have_l) {
1673 c = s->left_comp_ctx[row7] ? 3 :
1674 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1678 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1679 s->counts.comp[c][b->comp]++;
1682 // read actual references
1683 // FIXME probably cache a few variables here to prevent repetitive
1684 // memory accesses below
1685 if (b->comp) /* two references */ {
1686 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1688 b->ref[fix_idx] = s->fixcompref;
1689 // FIXME can this codeblob be replaced by some sort of LUT?
1692 if (s->above_intra_ctx[col]) {
1693 if (s->left_intra_ctx[row7]) {
1696 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1698 } else if (s->left_intra_ctx[row7]) {
1699 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1701 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1703 if (refl == refa && refa == s->varcompref[1]) {
1705 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1706 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1707 (refl == s->fixcompref && refa == s->varcompref[0])) {
1710 c = (refa == refl) ? 3 : 1;
1712 } else if (!s->left_comp_ctx[row7]) {
1713 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1716 c = (refl == s->varcompref[1] &&
1717 refa != s->varcompref[1]) ? 2 : 4;
1719 } else if (!s->above_comp_ctx[col]) {
1720 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1723 c = (refa == s->varcompref[1] &&
1724 refl != s->varcompref[1]) ? 2 : 4;
1727 c = (refl == refa) ? 4 : 2;
1731 if (s->above_intra_ctx[col]) {
1733 } else if (s->above_comp_ctx[col]) {
1734 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1736 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1739 } else if (have_l) {
1740 if (s->left_intra_ctx[row7]) {
1742 } else if (s->left_comp_ctx[row7]) {
1743 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1745 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1750 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1751 b->ref[var_idx] = s->varcompref[bit];
1752 s->counts.comp_ref[c][bit]++;
1753 } else /* single reference */ {
1756 if (have_a && !s->above_intra_ctx[col]) {
1757 if (have_l && !s->left_intra_ctx[row7]) {
1758 if (s->left_comp_ctx[row7]) {
1759 if (s->above_comp_ctx[col]) {
1760 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1761 !s->above_ref_ctx[col]);
1763 c = (3 * !s->above_ref_ctx[col]) +
1764 (!s->fixcompref || !s->left_ref_ctx[row7]);
1766 } else if (s->above_comp_ctx[col]) {
1767 c = (3 * !s->left_ref_ctx[row7]) +
1768 (!s->fixcompref || !s->above_ref_ctx[col]);
1770 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1772 } else if (s->above_intra_ctx[col]) {
1774 } else if (s->above_comp_ctx[col]) {
1775 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1777 c = 4 * (!s->above_ref_ctx[col]);
1779 } else if (have_l && !s->left_intra_ctx[row7]) {
1780 if (s->left_intra_ctx[row7]) {
1782 } else if (s->left_comp_ctx[row7]) {
1783 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1785 c = 4 * (!s->left_ref_ctx[row7]);
1790 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1791 s->counts.single_ref[c][0][bit]++;
1795 // FIXME can this codeblob be replaced by some sort of LUT?
1798 if (s->left_intra_ctx[row7]) {
1799 if (s->above_intra_ctx[col]) {
1801 } else if (s->above_comp_ctx[col]) {
1802 c = 1 + 2 * (s->fixcompref == 1 ||
1803 s->above_ref_ctx[col] == 1);
1804 } else if (!s->above_ref_ctx[col]) {
1807 c = 4 * (s->above_ref_ctx[col] == 1);
1809 } else if (s->above_intra_ctx[col]) {
1810 if (s->left_intra_ctx[row7]) {
1812 } else if (s->left_comp_ctx[row7]) {
1813 c = 1 + 2 * (s->fixcompref == 1 ||
1814 s->left_ref_ctx[row7] == 1);
1815 } else if (!s->left_ref_ctx[row7]) {
1818 c = 4 * (s->left_ref_ctx[row7] == 1);
1820 } else if (s->above_comp_ctx[col]) {
1821 if (s->left_comp_ctx[row7]) {
1822 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1823 c = 3 * (s->fixcompref == 1 ||
1824 s->left_ref_ctx[row7] == 1);
1828 } else if (!s->left_ref_ctx[row7]) {
1829 c = 1 + 2 * (s->fixcompref == 1 ||
1830 s->above_ref_ctx[col] == 1);
1832 c = 3 * (s->left_ref_ctx[row7] == 1) +
1833 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1835 } else if (s->left_comp_ctx[row7]) {
1836 if (!s->above_ref_ctx[col]) {
1837 c = 1 + 2 * (s->fixcompref == 1 ||
1838 s->left_ref_ctx[row7] == 1);
1840 c = 3 * (s->above_ref_ctx[col] == 1) +
1841 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1843 } else if (!s->above_ref_ctx[col]) {
1844 if (!s->left_ref_ctx[row7]) {
1847 c = 4 * (s->left_ref_ctx[row7] == 1);
1849 } else if (!s->left_ref_ctx[row7]) {
1850 c = 4 * (s->above_ref_ctx[col] == 1);
1852 c = 2 * (s->left_ref_ctx[row7] == 1) +
1853 2 * (s->above_ref_ctx[col] == 1);
1856 if (s->above_intra_ctx[col] ||
1857 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1859 } else if (s->above_comp_ctx[col]) {
1860 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1862 c = 4 * (s->above_ref_ctx[col] == 1);
1865 } else if (have_l) {
1866 if (s->left_intra_ctx[row7] ||
1867 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1869 } else if (s->left_comp_ctx[row7]) {
1870 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1872 c = 4 * (s->left_ref_ctx[row7] == 1);
1877 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1878 s->counts.single_ref[c][1][bit]++;
1879 b->ref[0] = 1 + bit;
1884 if (b->bs <= BS_8x8) {
1885 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1886 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1888 static const uint8_t off[10] = {
1889 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1892 // FIXME this needs to use the LUT tables from find_ref_mvs
1893 // because not all are -1,0/0,-1
1894 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1895 [s->left_mode_ctx[row7 + off[b->bs]]];
1897 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1898 s->prob.p.mv_mode[c]);
1899 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1900 s->counts.mv_mode[c][b->mode[0] - 10]++;
1904 if (s->filtermode == FILTER_SWITCHABLE) {
1907 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1908 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1909 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1910 s->left_filter_ctx[row7] : 3;
1912 c = s->above_filter_ctx[col];
1914 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1915 c = s->left_filter_ctx[row7];
1920 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1921 s->prob.p.filter[c]);
1922 s->counts.filter[c][filter_id]++;
1923 b->filter = vp9_filter_lut[filter_id];
1925 b->filter = s->filtermode;
1928 if (b->bs > BS_8x8) {
1929 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1931 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1932 s->prob.p.mv_mode[c]);
1933 s->counts.mv_mode[c][b->mode[0] - 10]++;
1934 fill_mv(s, b->mv[0], b->mode[0], 0);
1936 if (b->bs != BS_8x4) {
1937 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1938 s->prob.p.mv_mode[c]);
1939 s->counts.mv_mode[c][b->mode[1] - 10]++;
1940 fill_mv(s, b->mv[1], b->mode[1], 1);
1942 b->mode[1] = b->mode[0];
1943 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1944 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1947 if (b->bs != BS_4x8) {
1948 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1949 s->prob.p.mv_mode[c]);
1950 s->counts.mv_mode[c][b->mode[2] - 10]++;
1951 fill_mv(s, b->mv[2], b->mode[2], 2);
1953 if (b->bs != BS_8x4) {
1954 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1955 s->prob.p.mv_mode[c]);
1956 s->counts.mv_mode[c][b->mode[3] - 10]++;
1957 fill_mv(s, b->mv[3], b->mode[3], 3);
1959 b->mode[3] = b->mode[2];
1960 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1961 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1964 b->mode[2] = b->mode[0];
1965 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1966 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1967 b->mode[3] = b->mode[1];
1968 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1969 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1972 fill_mv(s, b->mv[0], b->mode[0], -1);
1973 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1974 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1975 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1976 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1977 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1978 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1981 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1985 #define SPLAT_CTX(var, val, n) \
1987 case 1: var = val; break; \
1988 case 2: AV_WN16A(&var, val * 0x0101); break; \
1989 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1990 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1992 uint64_t v64 = val * 0x0101010101010101ULL; \
1993 AV_WN64A( &var, v64); \
1994 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1999 #define SPLAT_CTX(var, val, n) \
2001 case 1: var = val; break; \
2002 case 2: AV_WN16A(&var, val * 0x0101); break; \
2003 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2005 uint32_t v32 = val * 0x01010101; \
2006 AV_WN32A( &var, v32); \
2007 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2011 uint32_t v32 = val * 0x01010101; \
2012 AV_WN32A( &var, v32); \
2013 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2014 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2015 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2021 switch (bwh_tab[1][b->bs][0]) {
2022 #define SET_CTXS(dir, off, n) \
2024 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2025 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2026 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2027 if (!s->keyframe && !s->intraonly) { \
2028 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2029 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2030 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2032 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2033 if (s->filtermode == FILTER_SWITCHABLE) { \
2034 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2039 case 1: SET_CTXS(above, col, 1); break;
2040 case 2: SET_CTXS(above, col, 2); break;
2041 case 4: SET_CTXS(above, col, 4); break;
2042 case 8: SET_CTXS(above, col, 8); break;
2044 switch (bwh_tab[1][b->bs][1]) {
2045 case 1: SET_CTXS(left, row7, 1); break;
2046 case 2: SET_CTXS(left, row7, 2); break;
2047 case 4: SET_CTXS(left, row7, 4); break;
2048 case 8: SET_CTXS(left, row7, 8); break;
2053 if (!s->keyframe && !s->intraonly) {
2054 if (b->bs > BS_8x8) {
2055 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2057 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2058 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2059 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2060 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2061 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2062 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2063 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2064 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2066 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2068 for (n = 0; n < w4 * 2; n++) {
2069 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2070 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2072 for (n = 0; n < h4 * 2; n++) {
2073 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2074 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2080 for (y = 0; y < h4; y++) {
2081 int x, o = (row + y) * s->sb_cols * 8 + col;
2082 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2085 for (x = 0; x < w4; x++) {
2089 } else if (b->comp) {
2090 for (x = 0; x < w4; x++) {
2091 mv[x].ref[0] = b->ref[0];
2092 mv[x].ref[1] = b->ref[1];
2093 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2094 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2097 for (x = 0; x < w4; x++) {
2098 mv[x].ref[0] = b->ref[0];
2100 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2106 // FIXME merge cnt/eob arguments?
2107 static av_always_inline int
2108 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2109 int is_tx32x32, unsigned (*cnt)[6][3],
2110 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2111 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2112 const int16_t *band_counts, const int16_t *qmul)
2114 int i = 0, band = 0, band_left = band_counts[band];
2115 uint8_t *tp = p[0][nnz];
2116 uint8_t cache[1024];
2121 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2122 eob[band][nnz][val]++;
2127 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2128 cnt[band][nnz][0]++;
2130 band_left = band_counts[++band];
2132 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2134 if (++i == n_coeffs)
2135 break; //invalid input; blocks should end with EOB
2140 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2141 cnt[band][nnz][1]++;
2145 // fill in p[3-10] (model fill) - only once per frame for each pos
2147 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2149 cnt[band][nnz][2]++;
2150 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2151 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2152 cache[rc] = val = 2;
2154 val = 3 + vp56_rac_get_prob(c, tp[5]);
2157 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2159 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2160 val = 5 + vp56_rac_get_prob(c, 159);
2162 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2163 val += vp56_rac_get_prob(c, 145);
2167 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2168 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2169 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2170 val += (vp56_rac_get_prob(c, 148) << 1);
2171 val += vp56_rac_get_prob(c, 140);
2173 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2174 val += (vp56_rac_get_prob(c, 155) << 2);
2175 val += (vp56_rac_get_prob(c, 140) << 1);
2176 val += vp56_rac_get_prob(c, 135);
2178 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2179 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2180 val += (vp56_rac_get_prob(c, 157) << 3);
2181 val += (vp56_rac_get_prob(c, 141) << 2);
2182 val += (vp56_rac_get_prob(c, 134) << 1);
2183 val += vp56_rac_get_prob(c, 130);
2185 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2186 val += (vp56_rac_get_prob(c, 254) << 12);
2187 val += (vp56_rac_get_prob(c, 254) << 11);
2188 val += (vp56_rac_get_prob(c, 252) << 10);
2189 val += (vp56_rac_get_prob(c, 249) << 9);
2190 val += (vp56_rac_get_prob(c, 243) << 8);
2191 val += (vp56_rac_get_prob(c, 230) << 7);
2192 val += (vp56_rac_get_prob(c, 196) << 6);
2193 val += (vp56_rac_get_prob(c, 177) << 5);
2194 val += (vp56_rac_get_prob(c, 153) << 4);
2195 val += (vp56_rac_get_prob(c, 140) << 3);
2196 val += (vp56_rac_get_prob(c, 133) << 2);
2197 val += (vp56_rac_get_prob(c, 130) << 1);
2198 val += vp56_rac_get_prob(c, 129);
2203 band_left = band_counts[++band];
2205 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2207 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2208 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2210 } while (++i < n_coeffs);
2215 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2216 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2217 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2218 const int16_t (*nb)[2], const int16_t *band_counts,
2219 const int16_t *qmul)
2221 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2222 nnz, scan, nb, band_counts, qmul);
2225 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2226 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2227 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2228 const int16_t (*nb)[2], const int16_t *band_counts,
2229 const int16_t *qmul)
2231 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2232 nnz, scan, nb, band_counts, qmul);
2235 static void decode_coeffs(AVCodecContext *ctx)
2237 VP9Context *s = ctx->priv_data;
2239 int row = s->row, col = s->col;
2240 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2241 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2242 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2243 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2244 int end_x = FFMIN(2 * (s->cols - col), w4);
2245 int end_y = FFMIN(2 * (s->rows - row), h4);
2246 int n, pl, x, y, res;
2247 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2248 int tx = 4 * s->lossless + b->tx;
2249 const int16_t * const *yscans = vp9_scans[tx];
2250 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2251 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2252 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2253 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2254 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2255 static const int16_t band_counts[4][8] = {
2256 { 1, 2, 3, 4, 3, 16 - 13 },
2257 { 1, 2, 3, 4, 11, 64 - 21 },
2258 { 1, 2, 3, 4, 11, 256 - 21 },
2259 { 1, 2, 3, 4, 11, 1024 - 21 },
2261 const int16_t *y_band_counts = band_counts[b->tx];
2262 const int16_t *uv_band_counts = band_counts[b->uvtx];
2264 #define MERGE(la, end, step, rd) \
2265 for (n = 0; n < end; n += step) \
2266 la[n] = !!rd(&la[n])
2267 #define MERGE_CTX(step, rd) \
2269 MERGE(l, end_y, step, rd); \
2270 MERGE(a, end_x, step, rd); \
2273 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2274 for (n = 0, y = 0; y < end_y; y += step) { \
2275 for (x = 0; x < end_x; x += step, n += step * step) { \
2276 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2277 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2278 c, e, p, a[x] + l[y], yscans[txtp], \
2279 ynbs[txtp], y_band_counts, qmul[0]); \
2280 a[x] = l[y] = !!res; \
2282 AV_WN16A(&s->eob[n], res); \
2289 #define SPLAT(la, end, step, cond) \
2291 for (n = 1; n < end; n += step) \
2292 la[n] = la[n - 1]; \
2293 } else if (step == 4) { \
2295 for (n = 0; n < end; n += step) \
2296 AV_WN32A(&la[n], la[n] * 0x01010101); \
2298 for (n = 0; n < end; n += step) \
2299 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2301 } else /* step == 8 */ { \
2303 if (HAVE_FAST_64BIT) { \
2304 for (n = 0; n < end; n += step) \
2305 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2307 for (n = 0; n < end; n += step) { \
2308 uint32_t v32 = la[n] * 0x01010101; \
2309 AV_WN32A(&la[n], v32); \
2310 AV_WN32A(&la[n + 4], v32); \
2314 for (n = 0; n < end; n += step) \
2315 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2318 #define SPLAT_CTX(step) \
2320 SPLAT(a, end_x, step, end_x == w4); \
2321 SPLAT(l, end_y, step, end_y == h4); \
2327 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2330 MERGE_CTX(2, AV_RN16A);
2331 DECODE_Y_COEF_LOOP(2, 0,);
2335 MERGE_CTX(4, AV_RN32A);
2336 DECODE_Y_COEF_LOOP(4, 0,);
2340 MERGE_CTX(8, AV_RN64A);
2341 DECODE_Y_COEF_LOOP(8, 0, 32);
2346 #define DECODE_UV_COEF_LOOP(step, decode_coeffs_fn) \
2347 for (n = 0, y = 0; y < end_y; y += step) { \
2348 for (x = 0; x < end_x; x += step, n += step * step) { \
2349 res = decode_coeffs_fn(&s->c, s->uvblock[pl] + 16 * n, \
2350 16 * step * step, c, e, p, a[x] + l[y], \
2351 uvscan, uvnb, uv_band_counts, qmul[1]); \
2352 a[x] = l[y] = !!res; \
2354 AV_WN16A(&s->uveob[pl][n], res); \
2356 s->uveob[pl][n] = res; \
2361 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2362 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2363 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2368 for (pl = 0; pl < 2; pl++) {
2369 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2370 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2373 DECODE_UV_COEF_LOOP(1, decode_coeffs_b);
2376 MERGE_CTX(2, AV_RN16A);
2377 DECODE_UV_COEF_LOOP(2, decode_coeffs_b);
2381 MERGE_CTX(4, AV_RN32A);
2382 DECODE_UV_COEF_LOOP(4, decode_coeffs_b);
2386 MERGE_CTX(8, AV_RN64A);
2387 DECODE_UV_COEF_LOOP(8, decode_coeffs_b32);
2394 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2395 uint8_t *dst_edge, ptrdiff_t stride_edge,
2396 uint8_t *dst_inner, ptrdiff_t stride_inner,
2397 uint8_t *l, int col, int x, int w,
2398 int row, int y, enum TxfmMode tx,
2399 int p, int ss_h, int ss_v)
2401 int have_top = row > 0 || y > 0;
2402 int have_left = col > s->tiling.tile_col_start || x > 0;
2403 int have_right = x < w - 1;
2404 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2405 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2406 { DC_127_PRED, VERT_PRED } },
2407 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2408 { HOR_PRED, HOR_PRED } },
2409 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2410 { LEFT_DC_PRED, DC_PRED } },
2411 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2412 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2413 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2414 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2415 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2416 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2417 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2418 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2419 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2420 { DC_127_PRED, VERT_LEFT_PRED } },
2421 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2422 { HOR_UP_PRED, HOR_UP_PRED } },
2423 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2424 { HOR_PRED, TM_VP8_PRED } },
2426 static const struct {
2427 uint8_t needs_left:1;
2428 uint8_t needs_top:1;
2429 uint8_t needs_topleft:1;
2430 uint8_t needs_topright:1;
2431 uint8_t invert_left:1;
2432 } edges[N_INTRA_PRED_MODES] = {
2433 [VERT_PRED] = { .needs_top = 1 },
2434 [HOR_PRED] = { .needs_left = 1 },
2435 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2436 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2437 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2438 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2439 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2440 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2441 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2442 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2443 [LEFT_DC_PRED] = { .needs_left = 1 },
2444 [TOP_DC_PRED] = { .needs_top = 1 },
2445 [DC_128_PRED] = { 0 },
2446 [DC_127_PRED] = { 0 },
2447 [DC_129_PRED] = { 0 }
2450 av_assert2(mode >= 0 && mode < 10);
2451 mode = mode_conv[mode][have_left][have_top];
2452 if (edges[mode].needs_top) {
2453 uint8_t *top, *topleft;
2454 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2455 int n_px_need_tr = 0;
2457 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2460 // if top of sb64-row, use s->intra_pred_data[] instead of
2461 // dst[-stride] for intra prediction (it contains pre- instead of
2462 // post-loopfilter data)
2464 top = !(row & 7) && !y ?
2465 s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
2466 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2468 topleft = !(row & 7) && !y ?
2469 s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
2470 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2471 &dst_inner[-stride_inner];
2475 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2476 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2477 n_px_need + n_px_need_tr <= n_px_have) {
2481 if (n_px_need <= n_px_have) {
2482 memcpy(*a, top, n_px_need);
2484 memcpy(*a, top, n_px_have);
2485 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2486 n_px_need - n_px_have);
2489 memset(*a, 127, n_px_need);
2491 if (edges[mode].needs_topleft) {
2492 if (have_left && have_top) {
2493 (*a)[-1] = topleft[-1];
2495 (*a)[-1] = have_top ? 129 : 127;
2498 if (tx == TX_4X4 && edges[mode].needs_topright) {
2499 if (have_top && have_right &&
2500 n_px_need + n_px_need_tr <= n_px_have) {
2501 memcpy(&(*a)[4], &top[4], 4);
2503 memset(&(*a)[4], (*a)[3], 4);
2508 if (edges[mode].needs_left) {
2510 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2511 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2512 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2514 if (edges[mode].invert_left) {
2515 if (n_px_need <= n_px_have) {
2516 for (i = 0; i < n_px_need; i++)
2517 l[i] = dst[i * stride - 1];
2519 for (i = 0; i < n_px_have; i++)
2520 l[i] = dst[i * stride - 1];
2521 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2524 if (n_px_need <= n_px_have) {
2525 for (i = 0; i < n_px_need; i++)
2526 l[n_px_need - 1 - i] = dst[i * stride - 1];
2528 for (i = 0; i < n_px_have; i++)
2529 l[n_px_need - 1 - i] = dst[i * stride - 1];
2530 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2534 memset(l, 129, 4 << tx);
2541 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2543 VP9Context *s = ctx->priv_data;
2545 int row = s->row, col = s->col;
2546 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2547 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2548 int end_x = FFMIN(2 * (s->cols - col), w4);
2549 int end_y = FFMIN(2 * (s->rows - row), h4);
2550 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2551 int uvstep1d = 1 << b->uvtx, p;
2552 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2553 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2554 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2556 for (n = 0, y = 0; y < end_y; y += step1d) {
2557 uint8_t *ptr = dst, *ptr_r = dst_r;
2558 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2559 ptr_r += 4 * step1d, n += step) {
2560 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2562 uint8_t *a = &a_buf[32];
2563 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2564 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2566 mode = check_intra_mode(s, mode, &a, ptr_r,
2567 s->frames[CUR_FRAME].tf.f->linesize[0],
2568 ptr, s->y_stride, l,
2569 col, x, w4, row, y, b->tx, 0, 0, 0);
2570 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2572 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2573 s->block + 16 * n, eob);
2575 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2576 dst += 4 * step1d * s->y_stride;
2583 step = 1 << (b->uvtx * 2);
2584 for (p = 0; p < 2; p++) {
2585 dst = s->dst[1 + p];
2586 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2587 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2588 uint8_t *ptr = dst, *ptr_r = dst_r;
2589 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2590 ptr_r += 4 * uvstep1d, n += step) {
2591 int mode = b->uvmode;
2592 uint8_t *a = &a_buf[32];
2593 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2595 mode = check_intra_mode(s, mode, &a, ptr_r,
2596 s->frames[CUR_FRAME].tf.f->linesize[1],
2597 ptr, s->uv_stride, l, col, x, w4, row, y,
2598 b->uvtx, p + 1, s->ss_h, s->ss_v);
2599 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2601 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2602 s->uvblock[p] + 16 * n, eob);
2604 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2605 dst += 4 * uvstep1d * s->uv_stride;
2610 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2611 uint8_t *dst, ptrdiff_t dst_stride,
2612 const uint8_t *ref, ptrdiff_t ref_stride,
2613 ThreadFrame *ref_frame,
2614 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2615 int bw, int bh, int w, int h,
2616 const uint16_t *scale, const uint8_t *step)
2618 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2619 // BUG libvpx seems to scale the two components separately. This introduces
2620 // rounding errors but we have to reproduce them to be exactly compatible
2621 // with the output from libvpx...
2622 int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2623 int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2624 int refbw_m1, refbh_m1;
2629 ref += y * ref_stride + x;
2632 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2633 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2634 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2635 // we use +7 because the last 7 pixels of each sbrow can be changed in
2636 // the longest loopfilter of the next sbrow
2637 th = (y + refbh_m1 + 4 + 7) >> 6;
2638 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2639 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2640 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2641 ref - 3 * ref_stride - 3,
2643 refbw_m1 + 8, refbh_m1 + 8,
2644 x - 3, y - 3, w, h);
2645 ref = s->edge_emu_buffer + 3 * 144 + 3;
2648 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2651 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2652 uint8_t *dst_u, uint8_t *dst_v,
2653 ptrdiff_t dst_stride,
2654 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2655 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2656 ThreadFrame *ref_frame,
2657 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2658 int bw, int bh, int w, int h,
2659 const uint16_t *scale, const uint8_t *step)
2661 // BUG https://code.google.com/p/webm/issues/detail?id=820
2662 int mx = scale_mv(mv->x << !s->ss_h, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2663 int my = scale_mv(mv->y << !s->ss_v, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2665 int refbw_m1, refbh_m1;
2670 ref_u += y * src_stride_u + x;
2671 ref_v += y * src_stride_v + x;
2674 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2675 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2676 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2677 // we use +7 because the last 7 pixels of each sbrow can be changed in
2678 // the longest loopfilter of the next sbrow
2679 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2680 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2681 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2682 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2683 ref_u - 3 * src_stride_u - 3,
2685 refbw_m1 + 8, refbh_m1 + 8,
2686 x - 3, y - 3, w, h);
2687 ref_u = s->edge_emu_buffer + 3 * 144 + 3;
2688 smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2690 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2691 ref_v - 3 * src_stride_v - 3,
2693 refbw_m1 + 8, refbh_m1 + 8,
2694 x - 3, y - 3, w, h);
2695 ref_v = s->edge_emu_buffer + 3 * 144 + 3;
2696 smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2698 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2699 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2703 #define FN(x) x##_scaled
2704 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2705 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2706 mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2707 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2708 row, col, mv, bw, bh, w, h, i) \
2709 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2710 row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2711 #include "vp9_mc_template.c"
2713 #undef mc_chroma_dir
2716 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2717 uint8_t *dst, ptrdiff_t dst_stride,
2718 const uint8_t *ref, ptrdiff_t ref_stride,
2719 ThreadFrame *ref_frame,
2720 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2721 int bw, int bh, int w, int h)
2723 int mx = mv->x, my = mv->y, th;
2727 ref += y * ref_stride + x;
2730 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2731 // we use +7 because the last 7 pixels of each sbrow can be changed in
2732 // the longest loopfilter of the next sbrow
2733 th = (y + bh + 4 * !!my + 7) >> 6;
2734 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2735 if (x < !!mx * 3 || y < !!my * 3 ||
2736 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2737 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2738 ref - !!my * 3 * ref_stride - !!mx * 3,
2740 bw + !!mx * 7, bh + !!my * 7,
2741 x - !!mx * 3, y - !!my * 3, w, h);
2742 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2745 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2748 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2749 uint8_t *dst_u, uint8_t *dst_v,
2750 ptrdiff_t dst_stride,
2751 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2752 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2753 ThreadFrame *ref_frame,
2754 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2755 int bw, int bh, int w, int h)
2757 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2761 ref_u += y * src_stride_u + x;
2762 ref_v += y * src_stride_v + x;
2765 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2766 // we use +7 because the last 7 pixels of each sbrow can be changed in
2767 // the longest loopfilter of the next sbrow
2768 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2769 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2770 if (x < !!mx * 3 || y < !!my * 3 ||
2771 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2772 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2773 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2775 bw + !!mx * 7, bh + !!my * 7,
2776 x - !!mx * 3, y - !!my * 3, w, h);
2777 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2778 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2780 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2781 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2783 bw + !!mx * 7, bh + !!my * 7,
2784 x - !!mx * 3, y - !!my * 3, w, h);
2785 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2786 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2788 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2789 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2794 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2795 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2797 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2798 row, col, mv, bw, bh, w, h, i) \
2799 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2800 row, col, mv, bw, bh, w, h)
2801 #include "vp9_mc_template.c"
2802 #undef mc_luma_dir_dir
2803 #undef mc_chroma_dir_dir
2806 static void inter_recon(AVCodecContext *ctx)
2808 VP9Context *s = ctx->priv_data;
2810 int row = s->row, col = s->col;
2812 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2813 inter_pred_scaled(ctx);
2818 /* mostly copied intra_recon() */
2820 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2821 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2822 int end_x = FFMIN(2 * (s->cols - col), w4);
2823 int end_y = FFMIN(2 * (s->rows - row), h4);
2824 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2825 int uvstep1d = 1 << b->uvtx, p;
2826 uint8_t *dst = s->dst[0];
2829 for (n = 0, y = 0; y < end_y; y += step1d) {
2831 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2832 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2835 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2836 s->block + 16 * n, eob);
2838 dst += 4 * s->y_stride * step1d;
2844 step = 1 << (b->uvtx * 2);
2845 for (p = 0; p < 2; p++) {
2846 dst = s->dst[p + 1];
2847 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2849 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2850 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2853 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2854 s->uvblock[p] + 16 * n, eob);
2856 dst += 4 * uvstep1d * s->uv_stride;
2862 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
2863 int row_and_7, int col_and_7,
2864 int w, int h, int col_end, int row_end,
2865 enum TxfmMode tx, int skip_inter)
2867 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
2868 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
2870 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2871 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2872 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2873 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2875 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2876 // edges. This means that for UV, we work on two subsampled blocks at
2877 // a time, and we only use the topleft block's mode information to set
2878 // things like block strength. Thus, for any block size smaller than
2879 // 16x16, ignore the odd portion of the block.
2880 if (tx == TX_4X4 && (ss_v | ss_h)) {
2895 if (tx == TX_4X4 && !skip_inter) {
2896 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2897 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2898 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
2900 for (y = row_and_7; y < h + row_and_7; y++) {
2901 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
2903 mask[0][y][1] |= m_row_8;
2904 mask[0][y][2] |= m_row_4;
2905 // for odd lines, if the odd col is not being filtered,
2906 // skip odd row also:
2913 // if a/c are even row/col and b/d are odd, and d is skipped,
2914 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2915 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
2916 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
2918 mask[1][y][col_mask_id] |= m_col;
2921 mask[0][y][3] |= m_col;
2923 mask[1][y][3] |= m_col;
2926 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2929 int mask_id = (tx == TX_8X8);
2930 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2931 int l2 = tx + ss_h - 1, step1d;
2932 int m_row = m_col & masks[l2];
2934 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2935 // 8wd loopfilter to prevent going off the visible edge.
2936 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2937 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2938 int m_row_8 = m_row - m_row_16;
2940 for (y = row_and_7; y < h + row_and_7; y++) {
2941 mask[0][y][0] |= m_row_16;
2942 mask[0][y][1] |= m_row_8;
2945 for (y = row_and_7; y < h + row_and_7; y++)
2946 mask[0][y][mask_id] |= m_row;
2951 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2952 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2953 mask[1][y][0] |= m_col;
2954 if (y - row_and_7 == h - 1)
2955 mask[1][y][1] |= m_col;
2957 for (y = row_and_7; y < h + row_and_7; y += step1d)
2958 mask[1][y][mask_id] |= m_col;
2960 } else if (tx != TX_4X4) {
2963 mask_id = (tx == TX_8X8) || (h == ss_v);
2964 mask[1][row_and_7][mask_id] |= m_col;
2965 mask_id = (tx == TX_8X8) || (w == ss_h);
2966 for (y = row_and_7; y < h + row_and_7; y++)
2967 mask[0][y][mask_id] |= t;
2969 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
2971 for (y = row_and_7; y < h + row_and_7; y++) {
2972 mask[0][y][2] |= t4;
2973 mask[0][y][1] |= t8;
2975 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
2980 static void decode_b(AVCodecContext *ctx, int row, int col,
2981 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2982 enum BlockLevel bl, enum BlockPartition bp)
2984 VP9Context *s = ctx->priv_data;
2986 enum BlockSize bs = bl * 3 + bp;
2987 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2989 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2995 s->min_mv.x = -(128 + col * 64);
2996 s->min_mv.y = -(128 + row * 64);
2997 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2998 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3004 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3005 (s->ss_v && h4 * 2 == (1 << b->tx)));
3012 #define SPLAT_ZERO_CTX(v, n) \
3014 case 1: v = 0; break; \
3015 case 2: AV_ZERO16(&v); break; \
3016 case 4: AV_ZERO32(&v); break; \
3017 case 8: AV_ZERO64(&v); break; \
3018 case 16: AV_ZERO128(&v); break; \
3020 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3022 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3023 if (s->ss_##dir2) { \
3024 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3025 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3027 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3028 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3033 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3034 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3035 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3036 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3039 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3040 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3041 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3042 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3047 s->block += w4 * h4 * 64;
3048 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3049 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3050 s->eob += 4 * w4 * h4;
3051 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3052 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3058 // emulated overhangs if the stride of the target buffer can't hold. This
3059 // allows to support emu-edge and so on even if we have large block
3061 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3062 (row + h4) > s->rows;
3063 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3064 (row + h4) > s->rows;
3066 s->dst[0] = s->tmp_y;
3069 s->dst[0] = f->data[0] + yoff;
3070 s->y_stride = f->linesize[0];
3073 s->dst[1] = s->tmp_uv[0];
3074 s->dst[2] = s->tmp_uv[1];
3077 s->dst[1] = f->data[1] + uvoff;
3078 s->dst[2] = f->data[2] + uvoff;
3079 s->uv_stride = f->linesize[1];
3082 intra_recon(ctx, yoff, uvoff);
3087 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3089 for (n = 0; o < w; n++) {
3094 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3095 s->tmp_y + o, 64, h, 0, 0);
3101 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3103 for (n = 1; o < w; n++) {
3108 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3109 s->tmp_uv[0] + o, 32, h, 0, 0);
3110 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3111 s->tmp_uv[1] + o, 32, h, 0, 0);
3117 // pick filter level and find edges to apply filter to
3118 if (s->filter.level &&
3119 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3120 [b->mode[3] != ZEROMV]) > 0) {
3121 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3122 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3124 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3125 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3126 if (s->ss_h || s->ss_v)
3127 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3128 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3129 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3130 b->uvtx, skip_inter);
3132 if (!s->filter.lim_lut[lvl]) {
3133 int sharp = s->filter.sharpness;
3137 limit >>= (sharp + 3) >> 2;
3138 limit = FFMIN(limit, 9 - sharp);
3140 limit = FFMAX(limit, 1);
3142 s->filter.lim_lut[lvl] = limit;
3143 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3149 s->block += w4 * h4 * 64;
3150 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3151 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3152 s->eob += 4 * w4 * h4;
3153 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3154 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3158 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3159 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3161 VP9Context *s = ctx->priv_data;
3162 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3163 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3164 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3165 s->prob.p.partition[bl][c];
3166 enum BlockPartition bp;
3167 ptrdiff_t hbs = 4 >> bl;
3168 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3169 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3172 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3173 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3174 } else if (col + hbs < s->cols) { // FIXME why not <=?
3175 if (row + hbs < s->rows) { // FIXME why not <=?
3176 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3178 case PARTITION_NONE:
3179 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3182 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3183 yoff += hbs * 8 * y_stride;
3184 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3185 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3188 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3190 uvoff += hbs * 8 >> s->ss_h;
3191 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3193 case PARTITION_SPLIT:
3194 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3195 decode_sb(ctx, row, col + hbs, lflvl,
3196 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3197 yoff += hbs * 8 * y_stride;
3198 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3199 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3200 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3201 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3206 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3207 bp = PARTITION_SPLIT;
3208 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3209 decode_sb(ctx, row, col + hbs, lflvl,
3210 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3213 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3215 } else if (row + hbs < s->rows) { // FIXME why not <=?
3216 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3217 bp = PARTITION_SPLIT;
3218 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3219 yoff += hbs * 8 * y_stride;
3220 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3221 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3224 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3227 bp = PARTITION_SPLIT;
3228 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3230 s->counts.partition[bl][c][bp]++;
3233 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3234 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3236 VP9Context *s = ctx->priv_data;
3238 ptrdiff_t hbs = 4 >> bl;
3239 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3240 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3243 av_assert2(b->bl == BL_8X8);
3244 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3245 } else if (s->b->bl == bl) {
3246 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3247 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3248 yoff += hbs * 8 * y_stride;
3249 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3250 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3251 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3253 uvoff += hbs * 8 >> s->ss_h;
3254 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3257 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3258 if (col + hbs < s->cols) { // FIXME why not <=?
3259 if (row + hbs < s->rows) {
3260 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3261 uvoff + (8 * hbs >> s->ss_h), bl + 1);
3262 yoff += hbs * 8 * y_stride;
3263 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3264 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3265 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3266 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3269 uvoff += hbs * 8 >> s->ss_h;
3270 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3272 } else if (row + hbs < s->rows) {
3273 yoff += hbs * 8 * y_stride;
3274 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3275 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3280 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3281 uint8_t *lvl, uint8_t (*mask)[4],
3282 uint8_t *dst, ptrdiff_t ls)
3286 // filter edges between columns (e.g. block1 | block2)
3287 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3288 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3289 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3290 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3291 unsigned hm = hm1 | hm2 | hm13 | hm23;
3293 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 >> ss_h) {
3296 int L = *l, H = L >> 4;
3297 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3299 if (hmask1[0] & x) {
3300 if (hmask2[0] & x) {
3301 av_assert2(l[8 << ss_v] == L);
3302 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3304 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3306 } else if (hm2 & x) {
3309 E |= s->filter.mblim_lut[L] << 8;
3310 I |= s->filter.lim_lut[L] << 8;
3311 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3313 [0](ptr, ls, E, I, H);
3315 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3316 [0](ptr, ls, E, I, H);
3318 } else if (hm2 & x) {
3319 int L = l[8 << ss_v], H = L >> 4;
3320 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3322 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3323 [0](ptr + 8 * ls, ls, E, I, H);
3331 int L = *l, H = L >> 4;
3332 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3337 E |= s->filter.mblim_lut[L] << 8;
3338 I |= s->filter.lim_lut[L] << 8;
3339 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls, E, I, H);
3341 s->dsp.loop_filter_8[0][0](ptr + 4, ls, E, I, H);
3343 } else if (hm23 & x) {
3344 int L = l[8 << ss_v], H = L >> 4;
3345 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3347 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4, ls, E, I, H);
3355 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3356 uint8_t *lvl, uint8_t (*mask)[4],
3357 uint8_t *dst, ptrdiff_t ls)
3362 // filter edges between rows (e.g. ------)
3364 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3365 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3366 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3368 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16, l += 2 << ss_h) {
3371 int L = *l, H = L >> 4;
3372 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3375 if (vmask[0] & (x << (1 + ss_h))) {
3376 av_assert2(l[1 + ss_h] == L);
3377 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3379 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3381 } else if (vm & (x << (1 + ss_h))) {
3384 E |= s->filter.mblim_lut[L] << 8;
3385 I |= s->filter.lim_lut[L] << 8;
3386 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3387 [!!(vmask[1] & (x << (1 + ss_h)))]
3388 [1](ptr, ls, E, I, H);
3390 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3391 [1](ptr, ls, E, I, H);
3393 } else if (vm & (x << (1 + ss_h))) {
3394 int L = l[1 + ss_h], H = L >> 4;
3395 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3397 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3398 [1](ptr + 8, ls, E, I, H);
3403 int L = *l, H = L >> 4;
3404 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3406 if (vm3 & (x << (1 + ss_h))) {
3409 E |= s->filter.mblim_lut[L] << 8;
3410 I |= s->filter.lim_lut[L] << 8;
3411 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3413 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3415 } else if (vm3 & (x << (1 + ss_h))) {
3416 int L = l[1 + ss_h], H = L >> 4;
3417 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3419 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8, ls, E, I, H);
3432 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3433 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3435 VP9Context *s = ctx->priv_data;
3436 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3437 uint8_t *dst = f->data[0] + yoff;
3438 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3439 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3442 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3443 // if you think of them as acting on a 8x8 block max, we can interleave
3444 // each v/h within the single x loop, but that only works if we work on
3445 // 8 pixel blocks, and we won't always do that (we want at least 16px
3446 // to use SSE2 optimizations, perhaps 32 for AVX2)
3448 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3449 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3451 for (p = 0; p < 2; p++) {
3452 dst = f->data[1 + p] + uvoff;
3453 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3454 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3458 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3460 int sb_start = ( idx * n) >> log2_n;
3461 int sb_end = ((idx + 1) * n) >> log2_n;
3462 *start = FFMIN(sb_start, n) << 3;
3463 *end = FFMIN(sb_end, n) << 3;
3466 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3467 int max_count, int update_factor)
3469 unsigned ct = ct0 + ct1, p2, p1;
3475 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3476 p2 = av_clip(p2, 1, 255);
3477 ct = FFMIN(ct, max_count);
3478 update_factor = FASTDIV(update_factor * ct, max_count);
3480 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3481 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3484 static void adapt_probs(VP9Context *s)
3487 prob_context *p = &s->prob_ctx[s->framectxid].p;
3488 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3491 for (i = 0; i < 4; i++)
3492 for (j = 0; j < 2; j++)
3493 for (k = 0; k < 2; k++)
3494 for (l = 0; l < 6; l++)
3495 for (m = 0; m < 6; m++) {
3496 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3497 unsigned *e = s->counts.eob[i][j][k][l][m];
3498 unsigned *c = s->counts.coef[i][j][k][l][m];
3500 if (l == 0 && m >= 3) // dc only has 3 pt
3503 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3504 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3505 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3508 if (s->keyframe || s->intraonly) {
3509 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3510 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3511 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3512 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3517 for (i = 0; i < 3; i++)
3518 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3521 for (i = 0; i < 4; i++)
3522 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3525 if (s->comppredmode == PRED_SWITCHABLE) {
3526 for (i = 0; i < 5; i++)
3527 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3531 if (s->comppredmode != PRED_SINGLEREF) {
3532 for (i = 0; i < 5; i++)
3533 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3534 s->counts.comp_ref[i][1], 20, 128);
3537 if (s->comppredmode != PRED_COMPREF) {
3538 for (i = 0; i < 5; i++) {
3539 uint8_t *pp = p->single_ref[i];
3540 unsigned (*c)[2] = s->counts.single_ref[i];
3542 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3543 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3547 // block partitioning
3548 for (i = 0; i < 4; i++)
3549 for (j = 0; j < 4; j++) {
3550 uint8_t *pp = p->partition[i][j];
3551 unsigned *c = s->counts.partition[i][j];
3553 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3554 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3555 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3559 if (s->txfmmode == TX_SWITCHABLE) {
3560 for (i = 0; i < 2; i++) {
3561 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3563 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3564 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3565 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3566 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3567 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3568 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3572 // interpolation filter
3573 if (s->filtermode == FILTER_SWITCHABLE) {
3574 for (i = 0; i < 4; i++) {
3575 uint8_t *pp = p->filter[i];
3576 unsigned *c = s->counts.filter[i];
3578 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3579 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3584 for (i = 0; i < 7; i++) {
3585 uint8_t *pp = p->mv_mode[i];
3586 unsigned *c = s->counts.mv_mode[i];
3588 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3589 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3590 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3595 uint8_t *pp = p->mv_joint;
3596 unsigned *c = s->counts.mv_joint;
3598 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3599 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3600 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3604 for (i = 0; i < 2; i++) {
3606 unsigned *c, (*c2)[2], sum;
3608 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3609 s->counts.mv_comp[i].sign[1], 20, 128);
3611 pp = p->mv_comp[i].classes;
3612 c = s->counts.mv_comp[i].classes;
3613 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3614 adapt_prob(&pp[0], c[0], sum, 20, 128);
3616 adapt_prob(&pp[1], c[1], sum, 20, 128);
3618 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3619 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3621 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3622 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3624 adapt_prob(&pp[6], c[6], sum, 20, 128);
3625 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3626 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3627 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3629 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3630 s->counts.mv_comp[i].class0[1], 20, 128);
3631 pp = p->mv_comp[i].bits;
3632 c2 = s->counts.mv_comp[i].bits;
3633 for (j = 0; j < 10; j++)
3634 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3636 for (j = 0; j < 2; j++) {
3637 pp = p->mv_comp[i].class0_fp[j];
3638 c = s->counts.mv_comp[i].class0_fp[j];
3639 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3640 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3641 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3643 pp = p->mv_comp[i].fp;
3644 c = s->counts.mv_comp[i].fp;
3645 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3646 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3647 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3649 if (s->highprecisionmvs) {
3650 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3651 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3652 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3653 s->counts.mv_comp[i].hp[1], 20, 128);
3658 for (i = 0; i < 4; i++) {
3659 uint8_t *pp = p->y_mode[i];
3660 unsigned *c = s->counts.y_mode[i], sum, s2;
3662 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3663 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3664 sum -= c[TM_VP8_PRED];
3665 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3666 sum -= c[VERT_PRED];
3667 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3668 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3670 adapt_prob(&pp[3], s2, sum, 20, 128);
3672 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3673 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3674 sum -= c[DIAG_DOWN_LEFT_PRED];
3675 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3676 sum -= c[VERT_LEFT_PRED];
3677 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3678 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3682 for (i = 0; i < 10; i++) {
3683 uint8_t *pp = p->uv_mode[i];
3684 unsigned *c = s->counts.uv_mode[i], sum, s2;
3686 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3687 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3688 sum -= c[TM_VP8_PRED];
3689 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3690 sum -= c[VERT_PRED];
3691 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3692 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3694 adapt_prob(&pp[3], s2, sum, 20, 128);
3696 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3697 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3698 sum -= c[DIAG_DOWN_LEFT_PRED];
3699 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3700 sum -= c[VERT_LEFT_PRED];
3701 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3702 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3706 static void free_buffers(VP9Context *s)
3708 av_freep(&s->intra_pred_data[0]);
3709 av_freep(&s->b_base);
3710 av_freep(&s->block_base);
3713 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3715 VP9Context *s = ctx->priv_data;
3718 for (i = 0; i < 3; i++) {
3719 if (s->frames[i].tf.f->data[0])
3720 vp9_unref_frame(ctx, &s->frames[i]);
3721 av_frame_free(&s->frames[i].tf.f);
3723 for (i = 0; i < 8; i++) {
3724 if (s->refs[i].f->data[0])
3725 ff_thread_release_buffer(ctx, &s->refs[i]);
3726 av_frame_free(&s->refs[i].f);
3727 if (s->next_refs[i].f->data[0])
3728 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3729 av_frame_free(&s->next_refs[i].f);
3739 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3740 int *got_frame, AVPacket *pkt)
3742 const uint8_t *data = pkt->data;
3743 int size = pkt->size;
3744 VP9Context *s = ctx->priv_data;
3745 int res, tile_row, tile_col, i, ref, row, col;
3746 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3747 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3750 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3752 } else if (res == 0) {
3753 if (!s->refs[ref].f->data[0]) {
3754 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3755 return AVERROR_INVALIDDATA;
3757 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3759 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3760 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3761 for (i = 0; i < 8; i++) {
3762 if (s->next_refs[i].f->data[0])
3763 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3764 if (s->refs[i].f->data[0] &&
3765 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
3774 if (!retain_segmap_ref) {
3775 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3776 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3777 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3778 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3781 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3782 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3783 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3784 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3786 if (s->frames[CUR_FRAME].tf.f->data[0])
3787 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3788 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3790 f = s->frames[CUR_FRAME].tf.f;
3791 f->key_frame = s->keyframe;
3792 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3793 ls_y = f->linesize[0];
3794 ls_uv =f->linesize[1];
3797 for (i = 0; i < 8; i++) {
3798 if (s->next_refs[i].f->data[0])
3799 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3800 if (s->refreshrefmask & (1 << i)) {
3801 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3802 } else if (s->refs[i].f->data[0]) {
3803 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3809 // main tile decode loop
3810 memset(s->above_partition_ctx, 0, s->cols);
3811 memset(s->above_skip_ctx, 0, s->cols);
3812 if (s->keyframe || s->intraonly) {
3813 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3815 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3817 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3818 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
3819 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
3820 memset(s->above_segpred_ctx, 0, s->cols);
3821 s->pass = s->frames[CUR_FRAME].uses_2pass =
3822 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3823 if ((res = update_block_buffers(ctx)) < 0) {
3824 av_log(ctx, AV_LOG_ERROR,
3825 "Failed to allocate block buffers\n");
3828 if (s->refreshctx && s->parallelmode) {
3831 for (i = 0; i < 4; i++) {
3832 for (j = 0; j < 2; j++)
3833 for (k = 0; k < 2; k++)
3834 for (l = 0; l < 6; l++)
3835 for (m = 0; m < 6; m++)
3836 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3837 s->prob.coef[i][j][k][l][m], 3);
3838 if (s->txfmmode == i)
3841 s->prob_ctx[s->framectxid].p = s->prob.p;
3842 ff_thread_finish_setup(ctx);
3843 } else if (!s->refreshctx) {
3844 ff_thread_finish_setup(ctx);
3850 s->block = s->block_base;
3851 s->uvblock[0] = s->uvblock_base[0];
3852 s->uvblock[1] = s->uvblock_base[1];
3853 s->eob = s->eob_base;
3854 s->uveob[0] = s->uveob_base[0];
3855 s->uveob[1] = s->uveob_base[1];
3857 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3858 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3859 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3861 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3864 if (tile_col == s->tiling.tile_cols - 1 &&
3865 tile_row == s->tiling.tile_rows - 1) {
3868 tile_size = AV_RB32(data);
3872 if (tile_size > size) {
3873 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3874 return AVERROR_INVALIDDATA;
3876 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3877 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3878 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3879 return AVERROR_INVALIDDATA;
3886 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3887 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
3888 struct VP9Filter *lflvl_ptr = s->lflvl;
3889 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3891 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3892 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3893 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3896 memset(s->left_partition_ctx, 0, 8);
3897 memset(s->left_skip_ctx, 0, 8);
3898 if (s->keyframe || s->intraonly) {
3899 memset(s->left_mode_ctx, DC_PRED, 16);
3901 memset(s->left_mode_ctx, NEARESTMV, 8);
3903 memset(s->left_y_nnz_ctx, 0, 16);
3904 memset(s->left_uv_nnz_ctx, 0, 32);
3905 memset(s->left_segpred_ctx, 0, 8);
3907 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3910 for (col = s->tiling.tile_col_start;
3911 col < s->tiling.tile_col_end;
3912 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
3913 // FIXME integrate with lf code (i.e. zero after each
3914 // use, similar to invtxfm coefficients, or similar)
3916 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3920 decode_sb_mem(ctx, row, col, lflvl_ptr,
3921 yoff2, uvoff2, BL_64X64);
3923 decode_sb(ctx, row, col, lflvl_ptr,
3924 yoff2, uvoff2, BL_64X64);
3928 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3936 // backup pre-loopfilter reconstruction data for intra
3937 // prediction of next row of sb64s
3938 if (row + 8 < s->rows) {
3939 memcpy(s->intra_pred_data[0],
3940 f->data[0] + yoff + 63 * ls_y,
3942 memcpy(s->intra_pred_data[1],
3943 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
3944 8 * s->cols >> s->ss_h);
3945 memcpy(s->intra_pred_data[2],
3946 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
3947 8 * s->cols >> s->ss_h);
3950 // loopfilter one row
3951 if (s->filter.level) {
3954 lflvl_ptr = s->lflvl;
3955 for (col = 0; col < s->cols;
3956 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
3957 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3961 // FIXME maybe we can make this more finegrained by running the
3962 // loopfilter per-block instead of after each sbrow
3963 // In fact that would also make intra pred left preparation easier?
3964 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3968 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3970 ff_thread_finish_setup(ctx);
3972 } while (s->pass++ == 1);
3973 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3976 for (i = 0; i < 8; i++) {
3977 if (s->refs[i].f->data[0])
3978 ff_thread_release_buffer(ctx, &s->refs[i]);
3979 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3982 if (!s->invisible) {
3983 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3991 static void vp9_decode_flush(AVCodecContext *ctx)
3993 VP9Context *s = ctx->priv_data;
3996 for (i = 0; i < 3; i++)
3997 vp9_unref_frame(ctx, &s->frames[i]);
3998 for (i = 0; i < 8; i++)
3999 ff_thread_release_buffer(ctx, &s->refs[i]);
4002 static int init_frames(AVCodecContext *ctx)
4004 VP9Context *s = ctx->priv_data;
4007 for (i = 0; i < 3; i++) {
4008 s->frames[i].tf.f = av_frame_alloc();
4009 if (!s->frames[i].tf.f) {
4010 vp9_decode_free(ctx);
4011 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4012 return AVERROR(ENOMEM);
4015 for (i = 0; i < 8; i++) {
4016 s->refs[i].f = av_frame_alloc();
4017 s->next_refs[i].f = av_frame_alloc();
4018 if (!s->refs[i].f || !s->next_refs[i].f) {
4019 vp9_decode_free(ctx);
4020 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4021 return AVERROR(ENOMEM);
4028 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4030 VP9Context *s = ctx->priv_data;
4032 ctx->internal->allocate_progress = 1;
4033 ff_vp9dsp_init(&s->dsp);
4034 ff_videodsp_init(&s->vdsp, 8);
4035 s->filter.sharpness = -1;
4037 return init_frames(ctx);
4040 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4042 return init_frames(avctx);
4045 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4048 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4050 // detect size changes in other threads
4051 if (s->intra_pred_data[0] &&
4052 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4056 for (i = 0; i < 3; i++) {
4057 if (s->frames[i].tf.f->data[0])
4058 vp9_unref_frame(dst, &s->frames[i]);
4059 if (ssrc->frames[i].tf.f->data[0]) {
4060 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4064 for (i = 0; i < 8; i++) {
4065 if (s->refs[i].f->data[0])
4066 ff_thread_release_buffer(dst, &s->refs[i]);
4067 if (ssrc->next_refs[i].f->data[0]) {
4068 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4073 s->invisible = ssrc->invisible;
4074 s->keyframe = ssrc->keyframe;
4075 s->ss_v = ssrc->ss_v;
4076 s->ss_h = ssrc->ss_h;
4077 s->segmentation.enabled = ssrc->segmentation.enabled;
4078 s->segmentation.update_map = ssrc->segmentation.update_map;
4079 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4080 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4081 if (ssrc->segmentation.enabled) {
4082 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4083 sizeof(s->segmentation.feat));
4089 static const AVProfile profiles[] = {
4090 { FF_PROFILE_VP9_0, "Profile 0" },
4091 { FF_PROFILE_VP9_1, "Profile 1" },
4092 { FF_PROFILE_UNKNOWN },
4095 AVCodec ff_vp9_decoder = {
4097 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4098 .type = AVMEDIA_TYPE_VIDEO,
4099 .id = AV_CODEC_ID_VP9,
4100 .priv_data_size = sizeof(VP9Context),
4101 .init = vp9_decode_init,
4102 .close = vp9_decode_free,
4103 .decode = vp9_decode_frame,
4104 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4105 .flush = vp9_decode_flush,
4106 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4107 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4108 .profiles = NULL_IF_CONFIG_SMALL(profiles),