2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
82 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
83 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
86 typedef struct VP9Block {
87 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
88 enum FilterMode filter;
89 VP56mv mv[4 /* b_idx */][2 /* ref */];
91 enum TxfmMode tx, uvtx;
93 enum BlockPartition bp;
96 typedef struct VP9Context {
103 VP9Block *b_base, *b;
105 int row, row7, col, col7;
107 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
113 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
131 #define REF_FRAME_MVPAIR 1
132 #define REF_FRAME_SEGMAP 2
139 uint8_t mblim_lut[64];
147 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
149 #define MAX_SEGMENT 8
153 uint8_t absolute_vals;
159 uint8_t skip_enabled;
168 unsigned log2_tile_cols, log2_tile_rows;
169 unsigned tile_cols, tile_rows;
170 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
172 unsigned sb_cols, sb_rows, rows, cols;
175 uint8_t coef[4][2][2][6][6][3];
179 uint8_t coef[4][2][2][6][6][11];
184 unsigned y_mode[4][10];
185 unsigned uv_mode[10][10];
186 unsigned filter[4][3];
187 unsigned mv_mode[7][4];
188 unsigned intra[4][2];
190 unsigned single_ref[5][2][2];
191 unsigned comp_ref[5][2];
192 unsigned tx32p[2][4];
193 unsigned tx16p[2][3];
196 unsigned mv_joint[4];
199 unsigned classes[11];
201 unsigned bits[10][2];
202 unsigned class0_fp[2][4];
204 unsigned class0_hp[2];
207 unsigned partition[4][4][4];
208 unsigned coef[4][2][2][6][6][3];
209 unsigned eob[4][2][2][6][6][2];
211 enum TxfmMode txfmmode;
212 enum CompPredMode comppredmode;
214 // contextual (left/above) cache
215 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
216 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
217 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
218 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
219 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
227 uint8_t *above_partition_ctx;
228 uint8_t *above_mode_ctx;
229 // FIXME maybe merge some of the below in a flags field?
230 uint8_t *above_y_nnz_ctx;
231 uint8_t *above_uv_nnz_ctx[2];
232 uint8_t *above_skip_ctx; // 1bit
233 uint8_t *above_txfm_ctx; // 2bit
234 uint8_t *above_segpred_ctx; // 1bit
235 uint8_t *above_intra_ctx; // 1bit
236 uint8_t *above_comp_ctx; // 1bit
237 uint8_t *above_ref_ctx; // 2bit
238 uint8_t *above_filter_ctx;
239 VP56mv (*above_mv_ctx)[2];
242 uint8_t *intra_pred_data[3];
243 struct VP9Filter *lflvl;
244 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144];
246 // block reconstruction intermediates
247 int block_alloc_using_2pass;
248 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
249 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
250 struct { int x, y; } min_mv, max_mv;
251 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
252 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
253 uint16_t mvscale[3][2];
254 uint8_t mvstep[3][2];
257 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
259 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
260 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
262 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
263 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
267 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
269 VP9Context *s = ctx->priv_data;
272 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
274 sz = 64 * s->sb_cols * s->sb_rows;
275 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
276 ff_thread_release_buffer(ctx, &f->tf);
277 return AVERROR(ENOMEM);
280 f->segmentation_map = f->extradata->data;
281 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
286 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
288 ff_thread_release_buffer(ctx, &f->tf);
289 av_buffer_unref(&f->extradata);
292 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
296 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
298 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
299 vp9_unref_frame(ctx, dst);
300 return AVERROR(ENOMEM);
303 dst->segmentation_map = src->segmentation_map;
305 dst->uses_2pass = src->uses_2pass;
310 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
312 VP9Context *s = ctx->priv_data;
315 av_assert0(w > 0 && h > 0);
317 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 // FIXME we slightly over-allocate here for subsampled chroma, but a little
331 // bit of padding shouldn't affect performance...
332 p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
334 return AVERROR(ENOMEM);
335 assign(s->intra_pred_data[0], uint8_t *, 64);
336 assign(s->intra_pred_data[1], uint8_t *, 64);
337 assign(s->intra_pred_data[2], uint8_t *, 64);
338 assign(s->above_y_nnz_ctx, uint8_t *, 16);
339 assign(s->above_mode_ctx, uint8_t *, 16);
340 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
341 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
342 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
343 assign(s->above_partition_ctx, uint8_t *, 8);
344 assign(s->above_skip_ctx, uint8_t *, 8);
345 assign(s->above_txfm_ctx, uint8_t *, 8);
346 assign(s->above_segpred_ctx, uint8_t *, 8);
347 assign(s->above_intra_ctx, uint8_t *, 8);
348 assign(s->above_comp_ctx, uint8_t *, 8);
349 assign(s->above_ref_ctx, uint8_t *, 8);
350 assign(s->above_filter_ctx, uint8_t *, 8);
351 assign(s->lflvl, struct VP9Filter *, 1);
354 // these will be re-allocated a little later
355 av_freep(&s->b_base);
356 av_freep(&s->block_base);
361 static int update_block_buffers(AVCodecContext *ctx)
363 VP9Context *s = ctx->priv_data;
364 int chroma_blocks, chroma_eobs;
366 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
370 av_free(s->block_base);
371 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
372 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
373 if (s->frames[CUR_FRAME].uses_2pass) {
374 int sbs = s->sb_cols * s->sb_rows;
376 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
377 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
378 16 * 16 + 2 * chroma_eobs) * sbs);
379 if (!s->b_base || !s->block_base)
380 return AVERROR(ENOMEM);
381 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
382 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
383 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
384 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
385 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
387 s->b_base = av_malloc(sizeof(VP9Block));
388 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
389 16 * 16 + 2 * chroma_eobs);
390 if (!s->b_base || !s->block_base)
391 return AVERROR(ENOMEM);
392 s->uvblock_base[0] = s->block_base + 64 * 64;
393 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
394 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
395 s->uveob_base[0] = s->eob_base + 16 * 16;
396 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
398 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
403 // for some reason the sign bit is at the end, not the start, of a bit sequence
404 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
406 int v = get_bits(gb, n);
407 return get_bits1(gb) ? -v : v;
410 static av_always_inline int inv_recenter_nonneg(int v, int m)
412 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
415 // differential forward probability updates
416 static int update_prob(VP56RangeCoder *c, int p)
418 static const int inv_map_table[254] = {
419 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
420 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
421 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
422 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
423 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
424 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
425 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
426 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
427 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
428 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
429 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
430 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
431 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
432 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
433 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
434 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
435 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
436 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
441 /* This code is trying to do a differential probability update. For a
442 * current probability A in the range [1, 255], the difference to a new
443 * probability of any value can be expressed differentially as 1-A,255-A
444 * where some part of this (absolute range) exists both in positive as
445 * well as the negative part, whereas another part only exists in one
446 * half. We're trying to code this shared part differentially, i.e.
447 * times two where the value of the lowest bit specifies the sign, and
448 * the single part is then coded on top of this. This absolute difference
449 * then again has a value of [0,254], but a bigger value in this range
450 * indicates that we're further away from the original value A, so we
451 * can code this as a VLC code, since higher values are increasingly
452 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
453 * updates vs. the 'fine, exact' updates further down the range, which
454 * adds one extra dimension to this differential update model. */
456 if (!vp8_rac_get(c)) {
457 d = vp8_rac_get_uint(c, 4) + 0;
458 } else if (!vp8_rac_get(c)) {
459 d = vp8_rac_get_uint(c, 4) + 16;
460 } else if (!vp8_rac_get(c)) {
461 d = vp8_rac_get_uint(c, 5) + 32;
463 d = vp8_rac_get_uint(c, 7);
465 d = (d << 1) - 65 + vp8_rac_get(c);
469 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
470 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
473 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
475 static const enum AVColorSpace colorspaces[8] = {
476 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
477 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
479 VP9Context *s = ctx->priv_data;
480 enum AVPixelFormat res;
482 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
483 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
484 if (s->profile == 1) {
485 s->ss_h = s->ss_v = 1;
486 res = AV_PIX_FMT_GBRP;
487 ctx->color_range = AVCOL_RANGE_JPEG;
489 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
490 return AVERROR_INVALIDDATA;
493 static const enum AVPixelFormat pix_fmt_for_ss[2 /* v */][2 /* h */] = {
494 { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
495 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P },
497 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
498 if (s->profile == 1) {
499 s->ss_h = get_bits1(&s->gb);
500 s->ss_v = get_bits1(&s->gb);
501 if ((res = pix_fmt_for_ss[s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
502 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile 1\n");
503 return AVERROR_INVALIDDATA;
504 } else if (get_bits1(&s->gb)) {
505 av_log(ctx, AV_LOG_ERROR, "Profile 1 color details reserved bit set\n");
506 return AVERROR_INVALIDDATA;
509 s->ss_h = s->ss_v = 1;
510 res = AV_PIX_FMT_YUV420P;
517 static int decode_frame_header(AVCodecContext *ctx,
518 const uint8_t *data, int size, int *ref)
520 VP9Context *s = ctx->priv_data;
521 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
522 enum AVPixelFormat fmt = ctx->pix_fmt;
524 const uint8_t *data2;
527 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
528 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
531 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
532 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
533 return AVERROR_INVALIDDATA;
535 s->profile = get_bits1(&s->gb);
536 s->profile |= get_bits1(&s->gb) << 1;
537 if (s->profile > 1) {
538 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", s->profile);
539 return AVERROR_INVALIDDATA;
541 if (get_bits1(&s->gb)) {
542 *ref = get_bits(&s->gb, 3);
545 s->last_keyframe = s->keyframe;
546 s->keyframe = !get_bits1(&s->gb);
547 last_invisible = s->invisible;
548 s->invisible = !get_bits1(&s->gb);
549 s->errorres = get_bits1(&s->gb);
550 s->use_last_frame_mvs = !s->errorres && !last_invisible;
552 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
553 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
554 return AVERROR_INVALIDDATA;
556 if ((fmt = read_colorspace_details(ctx)) < 0)
558 // for profile 1, here follows the subsampling bits
559 s->refreshrefmask = 0xff;
560 w = get_bits(&s->gb, 16) + 1;
561 h = get_bits(&s->gb, 16) + 1;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
565 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
566 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
568 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
569 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
570 return AVERROR_INVALIDDATA;
572 if (s->profile == 1) {
573 if ((fmt = read_colorspace_details(ctx)) < 0)
576 s->ss_h = s->ss_v = 1;
577 fmt = AV_PIX_FMT_YUV420P;
578 ctx->colorspace = AVCOL_SPC_BT470BG;
579 ctx->color_range = AVCOL_RANGE_JPEG;
581 s->refreshrefmask = get_bits(&s->gb, 8);
582 w = get_bits(&s->gb, 16) + 1;
583 h = get_bits(&s->gb, 16) + 1;
584 if (get_bits1(&s->gb)) // display size
585 skip_bits(&s->gb, 32);
587 s->refreshrefmask = get_bits(&s->gb, 8);
588 s->refidx[0] = get_bits(&s->gb, 3);
589 s->signbias[0] = get_bits1(&s->gb);
590 s->refidx[1] = get_bits(&s->gb, 3);
591 s->signbias[1] = get_bits1(&s->gb);
592 s->refidx[2] = get_bits(&s->gb, 3);
593 s->signbias[2] = get_bits1(&s->gb);
594 if (!s->refs[s->refidx[0]].f->data[0] ||
595 !s->refs[s->refidx[1]].f->data[0] ||
596 !s->refs[s->refidx[2]].f->data[0]) {
597 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
598 return AVERROR_INVALIDDATA;
600 if (get_bits1(&s->gb)) {
601 w = s->refs[s->refidx[0]].f->width;
602 h = s->refs[s->refidx[0]].f->height;
603 } else if (get_bits1(&s->gb)) {
604 w = s->refs[s->refidx[1]].f->width;
605 h = s->refs[s->refidx[1]].f->height;
606 } else if (get_bits1(&s->gb)) {
607 w = s->refs[s->refidx[2]].f->width;
608 h = s->refs[s->refidx[2]].f->height;
610 w = get_bits(&s->gb, 16) + 1;
611 h = get_bits(&s->gb, 16) + 1;
613 // Note that in this code, "CUR_FRAME" is actually before we
614 // have formally allocated a frame, and thus actually represents
616 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
617 s->frames[CUR_FRAME].tf.f->height == h;
618 if (get_bits1(&s->gb)) // display size
619 skip_bits(&s->gb, 32);
620 s->highprecisionmvs = get_bits1(&s->gb);
621 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
623 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
624 s->signbias[0] != s->signbias[2];
625 if (s->allowcompinter) {
626 if (s->signbias[0] == s->signbias[1]) {
628 s->varcompref[0] = 0;
629 s->varcompref[1] = 1;
630 } else if (s->signbias[0] == s->signbias[2]) {
632 s->varcompref[0] = 0;
633 s->varcompref[1] = 2;
636 s->varcompref[0] = 1;
637 s->varcompref[1] = 2;
641 for (i = 0; i < 3; i++) {
642 AVFrame *ref = s->refs[s->refidx[i]].f;
643 int refw = ref->width, refh = ref->height;
645 if (refw == w && refh == h) {
646 s->mvscale[i][0] = s->mvscale[i][1] = 0;
648 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
649 av_log(ctx, AV_LOG_ERROR,
650 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
652 return AVERROR_INVALIDDATA;
654 s->mvscale[i][0] = (refw << 14) / w;
655 s->mvscale[i][1] = (refh << 14) / h;
656 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
657 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
662 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
663 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
664 s->framectxid = c = get_bits(&s->gb, 2);
666 /* loopfilter header data */
667 s->filter.level = get_bits(&s->gb, 6);
668 sharp = get_bits(&s->gb, 3);
669 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
670 // the old cache values since they are still valid
671 if (s->filter.sharpness != sharp)
672 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
673 s->filter.sharpness = sharp;
674 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
675 if (get_bits1(&s->gb)) {
676 for (i = 0; i < 4; i++)
677 if (get_bits1(&s->gb))
678 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
679 for (i = 0; i < 2; i++)
680 if (get_bits1(&s->gb))
681 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
685 /* quantization header data */
686 s->yac_qi = get_bits(&s->gb, 8);
687 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
688 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
689 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
690 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
691 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
693 /* segmentation header info */
694 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
695 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
696 for (i = 0; i < 7; i++)
697 s->prob.seg[i] = get_bits1(&s->gb) ?
698 get_bits(&s->gb, 8) : 255;
699 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
700 for (i = 0; i < 3; i++)
701 s->prob.segpred[i] = get_bits1(&s->gb) ?
702 get_bits(&s->gb, 8) : 255;
705 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
706 (w != s->frames[CUR_FRAME].tf.f->width ||
707 h != s->frames[CUR_FRAME].tf.f->height)) {
708 av_log(ctx, AV_LOG_ERROR,
709 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
710 s->segmentation.temporal, s->segmentation.update_map);
711 return AVERROR_INVALIDDATA;
714 if (get_bits1(&s->gb)) {
715 s->segmentation.absolute_vals = get_bits1(&s->gb);
716 for (i = 0; i < 8; i++) {
717 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
718 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
719 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
720 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
721 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
722 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
723 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
727 s->segmentation.feat[0].q_enabled = 0;
728 s->segmentation.feat[0].lf_enabled = 0;
729 s->segmentation.feat[0].skip_enabled = 0;
730 s->segmentation.feat[0].ref_enabled = 0;
733 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
734 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
735 int qyac, qydc, quvac, quvdc, lflvl, sh;
737 if (s->segmentation.feat[i].q_enabled) {
738 if (s->segmentation.absolute_vals)
739 qyac = s->segmentation.feat[i].q_val;
741 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
745 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
746 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
747 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
748 qyac = av_clip_uintp2(qyac, 8);
750 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
751 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
752 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
753 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
755 sh = s->filter.level >= 32;
756 if (s->segmentation.feat[i].lf_enabled) {
757 if (s->segmentation.absolute_vals)
758 lflvl = s->segmentation.feat[i].lf_val;
760 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
762 lflvl = s->filter.level;
764 if (s->lf_delta.enabled) {
765 s->segmentation.feat[i].lflvl[0][0] =
766 s->segmentation.feat[i].lflvl[0][1] =
767 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
768 for (j = 1; j < 4; j++) {
769 s->segmentation.feat[i].lflvl[j][0] =
770 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
771 s->lf_delta.mode[0]) * (1 << sh)), 6);
772 s->segmentation.feat[i].lflvl[j][1] =
773 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
774 s->lf_delta.mode[1]) * (1 << sh)), 6);
777 memset(s->segmentation.feat[i].lflvl, lflvl,
778 sizeof(s->segmentation.feat[i].lflvl));
783 if ((res = update_size(ctx, w, h, fmt)) < 0) {
784 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
787 for (s->tiling.log2_tile_cols = 0;
788 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
789 s->tiling.log2_tile_cols++) ;
790 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
791 max = FFMAX(0, max - 1);
792 while (max > s->tiling.log2_tile_cols) {
793 if (get_bits1(&s->gb))
794 s->tiling.log2_tile_cols++;
798 s->tiling.log2_tile_rows = decode012(&s->gb);
799 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
800 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
801 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
802 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
803 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
805 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
806 return AVERROR(ENOMEM);
810 if (s->keyframe || s->errorres || s->intraonly) {
811 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
812 s->prob_ctx[3].p = vp9_default_probs;
813 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
814 sizeof(vp9_default_coef_probs));
815 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
816 sizeof(vp9_default_coef_probs));
817 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
818 sizeof(vp9_default_coef_probs));
819 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
820 sizeof(vp9_default_coef_probs));
823 // next 16 bits is size of the rest of the header (arith-coded)
824 size2 = get_bits(&s->gb, 16);
825 data2 = align_get_bits(&s->gb);
826 if (size2 > size - (data2 - data)) {
827 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
828 return AVERROR_INVALIDDATA;
830 ff_vp56_init_range_decoder(&s->c, data2, size2);
831 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
832 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
833 return AVERROR_INVALIDDATA;
836 if (s->keyframe || s->intraonly) {
837 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
839 memset(&s->counts, 0, sizeof(s->counts));
841 // FIXME is it faster to not copy here, but do it down in the fw updates
842 // as explicit copies if the fw update is missing (and skip the copy upon
844 s->prob.p = s->prob_ctx[c].p;
848 s->txfmmode = TX_4X4;
850 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
851 if (s->txfmmode == 3)
852 s->txfmmode += vp8_rac_get(&s->c);
854 if (s->txfmmode == TX_SWITCHABLE) {
855 for (i = 0; i < 2; i++)
856 if (vp56_rac_get_prob_branchy(&s->c, 252))
857 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
858 for (i = 0; i < 2; i++)
859 for (j = 0; j < 2; j++)
860 if (vp56_rac_get_prob_branchy(&s->c, 252))
861 s->prob.p.tx16p[i][j] =
862 update_prob(&s->c, s->prob.p.tx16p[i][j]);
863 for (i = 0; i < 2; i++)
864 for (j = 0; j < 3; j++)
865 if (vp56_rac_get_prob_branchy(&s->c, 252))
866 s->prob.p.tx32p[i][j] =
867 update_prob(&s->c, s->prob.p.tx32p[i][j]);
872 for (i = 0; i < 4; i++) {
873 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
874 if (vp8_rac_get(&s->c)) {
875 for (j = 0; j < 2; j++)
876 for (k = 0; k < 2; k++)
877 for (l = 0; l < 6; l++)
878 for (m = 0; m < 6; m++) {
879 uint8_t *p = s->prob.coef[i][j][k][l][m];
880 uint8_t *r = ref[j][k][l][m];
881 if (m >= 3 && l == 0) // dc only has 3 pt
883 for (n = 0; n < 3; n++) {
884 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
885 p[n] = update_prob(&s->c, r[n]);
893 for (j = 0; j < 2; j++)
894 for (k = 0; k < 2; k++)
895 for (l = 0; l < 6; l++)
896 for (m = 0; m < 6; m++) {
897 uint8_t *p = s->prob.coef[i][j][k][l][m];
898 uint8_t *r = ref[j][k][l][m];
899 if (m > 3 && l == 0) // dc only has 3 pt
905 if (s->txfmmode == i)
910 for (i = 0; i < 3; i++)
911 if (vp56_rac_get_prob_branchy(&s->c, 252))
912 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
913 if (!s->keyframe && !s->intraonly) {
914 for (i = 0; i < 7; i++)
915 for (j = 0; j < 3; j++)
916 if (vp56_rac_get_prob_branchy(&s->c, 252))
917 s->prob.p.mv_mode[i][j] =
918 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
920 if (s->filtermode == FILTER_SWITCHABLE)
921 for (i = 0; i < 4; i++)
922 for (j = 0; j < 2; j++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.filter[i][j] =
925 update_prob(&s->c, s->prob.p.filter[i][j]);
927 for (i = 0; i < 4; i++)
928 if (vp56_rac_get_prob_branchy(&s->c, 252))
929 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
931 if (s->allowcompinter) {
932 s->comppredmode = vp8_rac_get(&s->c);
934 s->comppredmode += vp8_rac_get(&s->c);
935 if (s->comppredmode == PRED_SWITCHABLE)
936 for (i = 0; i < 5; i++)
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
939 update_prob(&s->c, s->prob.p.comp[i]);
941 s->comppredmode = PRED_SINGLEREF;
944 if (s->comppredmode != PRED_COMPREF) {
945 for (i = 0; i < 5; i++) {
946 if (vp56_rac_get_prob_branchy(&s->c, 252))
947 s->prob.p.single_ref[i][0] =
948 update_prob(&s->c, s->prob.p.single_ref[i][0]);
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.single_ref[i][1] =
951 update_prob(&s->c, s->prob.p.single_ref[i][1]);
955 if (s->comppredmode != PRED_SINGLEREF) {
956 for (i = 0; i < 5; i++)
957 if (vp56_rac_get_prob_branchy(&s->c, 252))
958 s->prob.p.comp_ref[i] =
959 update_prob(&s->c, s->prob.p.comp_ref[i]);
962 for (i = 0; i < 4; i++)
963 for (j = 0; j < 9; j++)
964 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 s->prob.p.y_mode[i][j] =
966 update_prob(&s->c, s->prob.p.y_mode[i][j]);
968 for (i = 0; i < 4; i++)
969 for (j = 0; j < 4; j++)
970 for (k = 0; k < 3; k++)
971 if (vp56_rac_get_prob_branchy(&s->c, 252))
972 s->prob.p.partition[3 - i][j][k] =
973 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
975 // mv fields don't use the update_prob subexp model for some reason
976 for (i = 0; i < 3; i++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
980 for (i = 0; i < 2; i++) {
981 if (vp56_rac_get_prob_branchy(&s->c, 252))
982 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
984 for (j = 0; j < 10; j++)
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.mv_comp[i].classes[j] =
987 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
989 if (vp56_rac_get_prob_branchy(&s->c, 252))
990 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
992 for (j = 0; j < 10; j++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.mv_comp[i].bits[j] =
995 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
998 for (i = 0; i < 2; i++) {
999 for (j = 0; j < 2; j++)
1000 for (k = 0; k < 3; k++)
1001 if (vp56_rac_get_prob_branchy(&s->c, 252))
1002 s->prob.p.mv_comp[i].class0_fp[j][k] =
1003 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1005 for (j = 0; j < 3; j++)
1006 if (vp56_rac_get_prob_branchy(&s->c, 252))
1007 s->prob.p.mv_comp[i].fp[j] =
1008 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1011 if (s->highprecisionmvs) {
1012 for (i = 0; i < 2; i++) {
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.mv_comp[i].class0_hp =
1015 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018 s->prob.p.mv_comp[i].hp =
1019 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1024 return (data2 - data) + size2;
1027 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1030 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1031 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1034 static void find_ref_mvs(VP9Context *s,
1035 VP56mv *pmv, int ref, int z, int idx, int sb)
1037 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1038 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1039 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1040 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1041 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1042 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1043 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1044 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1045 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1046 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1047 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1048 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1049 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1050 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1051 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1052 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1053 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1054 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1055 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1056 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1057 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1058 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1059 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1060 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1061 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1062 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1063 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1066 int row = s->row, col = s->col, row7 = s->row7;
1067 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1068 #define INVALID_MV 0x80008000U
1069 uint32_t mem = INVALID_MV;
1072 #define RETURN_DIRECT_MV(mv) \
1074 uint32_t m = AV_RN32A(&mv); \
1078 } else if (mem == INVALID_MV) { \
1080 } else if (m != mem) { \
1087 if (sb == 2 || sb == 1) {
1088 RETURN_DIRECT_MV(b->mv[0][z]);
1089 } else if (sb == 3) {
1090 RETURN_DIRECT_MV(b->mv[2][z]);
1091 RETURN_DIRECT_MV(b->mv[1][z]);
1092 RETURN_DIRECT_MV(b->mv[0][z]);
1095 #define RETURN_MV(mv) \
1100 clamp_mv(&tmp, &mv, s); \
1101 m = AV_RN32A(&tmp); \
1105 } else if (mem == INVALID_MV) { \
1107 } else if (m != mem) { \
1112 uint32_t m = AV_RN32A(&mv); \
1114 clamp_mv(pmv, &mv, s); \
1116 } else if (mem == INVALID_MV) { \
1118 } else if (m != mem) { \
1119 clamp_mv(pmv, &mv, s); \
1126 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1127 if (mv->ref[0] == ref) {
1128 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1129 } else if (mv->ref[1] == ref) {
1130 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1133 if (col > s->tiling.tile_col_start) {
1134 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1135 if (mv->ref[0] == ref) {
1136 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1137 } else if (mv->ref[1] == ref) {
1138 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1146 // previously coded MVs in this neighbourhood, using same reference frame
1147 for (; i < 8; i++) {
1148 int c = p[i][0] + col, r = p[i][1] + row;
1150 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1151 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1153 if (mv->ref[0] == ref) {
1154 RETURN_MV(mv->mv[0]);
1155 } else if (mv->ref[1] == ref) {
1156 RETURN_MV(mv->mv[1]);
1161 // MV at this position in previous frame, using same reference frame
1162 if (s->use_last_frame_mvs) {
1163 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1165 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1166 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1167 if (mv->ref[0] == ref) {
1168 RETURN_MV(mv->mv[0]);
1169 } else if (mv->ref[1] == ref) {
1170 RETURN_MV(mv->mv[1]);
1174 #define RETURN_SCALE_MV(mv, scale) \
1177 VP56mv mv_temp = { -mv.x, -mv.y }; \
1178 RETURN_MV(mv_temp); \
1184 // previously coded MVs in this neighbourhood, using different reference frame
1185 for (i = 0; i < 8; i++) {
1186 int c = p[i][0] + col, r = p[i][1] + row;
1188 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1189 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1191 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1192 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1194 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1195 // BUG - libvpx has this condition regardless of whether
1196 // we used the first ref MV and pre-scaling
1197 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1198 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1203 // MV at this position in previous frame, using different reference frame
1204 if (s->use_last_frame_mvs) {
1205 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1207 // no need to await_progress, because we already did that above
1208 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1209 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1211 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1212 // BUG - libvpx has this condition regardless of whether
1213 // we used the first ref MV and pre-scaling
1214 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1215 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1222 #undef RETURN_SCALE_MV
1225 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1227 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1228 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1229 s->prob.p.mv_comp[idx].classes);
1231 s->counts.mv_comp[idx].sign[sign]++;
1232 s->counts.mv_comp[idx].classes[c]++;
1236 for (n = 0, m = 0; m < c; m++) {
1237 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1239 s->counts.mv_comp[idx].bits[m][bit]++;
1242 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1244 s->counts.mv_comp[idx].fp[bit]++;
1246 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1247 s->counts.mv_comp[idx].hp[bit]++;
1251 // bug in libvpx - we count for bw entropy purposes even if the
1253 s->counts.mv_comp[idx].hp[1]++;
1257 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1258 s->counts.mv_comp[idx].class0[n]++;
1259 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1260 s->prob.p.mv_comp[idx].class0_fp[n]);
1261 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1262 n = (n << 3) | (bit << 1);
1264 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1265 s->counts.mv_comp[idx].class0_hp[bit]++;
1269 // bug in libvpx - we count for bw entropy purposes even if the
1271 s->counts.mv_comp[idx].class0_hp[1]++;
1275 return sign ? -(n + 1) : (n + 1);
1278 static void fill_mv(VP9Context *s,
1279 VP56mv *mv, int mode, int sb)
1283 if (mode == ZEROMV) {
1288 // FIXME cache this value and reuse for other subblocks
1289 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1290 mode == NEWMV ? -1 : sb);
1291 // FIXME maybe move this code into find_ref_mvs()
1292 if ((mode == NEWMV || sb == -1) &&
1293 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1307 if (mode == NEWMV) {
1308 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1309 s->prob.p.mv_joint);
1311 s->counts.mv_joint[j]++;
1312 if (j >= MV_JOINT_V)
1313 mv[0].y += read_mv_component(s, 0, hp);
1315 mv[0].x += read_mv_component(s, 1, hp);
1319 // FIXME cache this value and reuse for other subblocks
1320 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1321 mode == NEWMV ? -1 : sb);
1322 if ((mode == NEWMV || sb == -1) &&
1323 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1337 if (mode == NEWMV) {
1338 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1339 s->prob.p.mv_joint);
1341 s->counts.mv_joint[j]++;
1342 if (j >= MV_JOINT_V)
1343 mv[1].y += read_mv_component(s, 0, hp);
1345 mv[1].x += read_mv_component(s, 1, hp);
1351 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1352 ptrdiff_t stride, int v)
1362 int v16 = v * 0x0101;
1370 uint32_t v32 = v * 0x01010101;
1379 uint64_t v64 = v * 0x0101010101010101ULL;
1385 uint32_t v32 = v * 0x01010101;
1388 AV_WN32A(ptr + 4, v32);
1397 static void decode_mode(AVCodecContext *ctx)
1399 static const uint8_t left_ctx[N_BS_SIZES] = {
1400 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1402 static const uint8_t above_ctx[N_BS_SIZES] = {
1403 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1405 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1406 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1407 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1409 VP9Context *s = ctx->priv_data;
1411 int row = s->row, col = s->col, row7 = s->row7;
1412 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1413 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1414 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1415 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1416 int vref, filter_id;
1418 if (!s->segmentation.enabled) {
1420 } else if (s->keyframe || s->intraonly) {
1421 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1422 } else if (!s->segmentation.update_map ||
1423 (s->segmentation.temporal &&
1424 vp56_rac_get_prob_branchy(&s->c,
1425 s->prob.segpred[s->above_segpred_ctx[col] +
1426 s->left_segpred_ctx[row7]]))) {
1429 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1431 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1432 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1433 for (y = 0; y < h4; y++) {
1434 int idx_base = (y + row) * 8 * s->sb_cols + col;
1435 for (x = 0; x < w4; x++)
1436 pred = FFMIN(pred, refsegmap[idx_base + x]);
1438 av_assert1(pred < 8);
1444 memset(&s->above_segpred_ctx[col], 1, w4);
1445 memset(&s->left_segpred_ctx[row7], 1, h4);
1447 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1450 memset(&s->above_segpred_ctx[col], 0, w4);
1451 memset(&s->left_segpred_ctx[row7], 0, h4);
1453 if (s->segmentation.enabled &&
1454 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1455 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1456 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1459 b->skip = s->segmentation.enabled &&
1460 s->segmentation.feat[b->seg_id].skip_enabled;
1462 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1463 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1464 s->counts.skip[c][b->skip]++;
1467 if (s->keyframe || s->intraonly) {
1469 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1470 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1474 if (have_a && have_l) {
1475 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1478 c = have_a ? 2 * s->above_intra_ctx[col] :
1479 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1481 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1482 s->counts.intra[c][bit]++;
1486 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1490 c = (s->above_skip_ctx[col] ? max_tx :
1491 s->above_txfm_ctx[col]) +
1492 (s->left_skip_ctx[row7] ? max_tx :
1493 s->left_txfm_ctx[row7]) > max_tx;
1495 c = s->above_skip_ctx[col] ? 1 :
1496 (s->above_txfm_ctx[col] * 2 > max_tx);
1498 } else if (have_l) {
1499 c = s->left_skip_ctx[row7] ? 1 :
1500 (s->left_txfm_ctx[row7] * 2 > max_tx);
1506 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1508 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1510 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1512 s->counts.tx32p[c][b->tx]++;
1515 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1517 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1518 s->counts.tx16p[c][b->tx]++;
1521 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1522 s->counts.tx8p[c][b->tx]++;
1529 b->tx = FFMIN(max_tx, s->txfmmode);
1532 if (s->keyframe || s->intraonly) {
1533 uint8_t *a = &s->above_mode_ctx[col * 2];
1534 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1537 if (b->bs > BS_8x8) {
1538 // FIXME the memory storage intermediates here aren't really
1539 // necessary, they're just there to make the code slightly
1541 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1542 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1543 if (b->bs != BS_8x4) {
1544 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1545 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1546 l[0] = a[1] = b->mode[1];
1548 l[0] = a[1] = b->mode[1] = b->mode[0];
1550 if (b->bs != BS_4x8) {
1551 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1552 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1553 if (b->bs != BS_8x4) {
1554 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1555 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1556 l[1] = a[1] = b->mode[3];
1558 l[1] = a[1] = b->mode[3] = b->mode[2];
1561 b->mode[2] = b->mode[0];
1562 l[1] = a[1] = b->mode[3] = b->mode[1];
1565 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1566 vp9_default_kf_ymode_probs[*a][*l]);
1567 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1568 // FIXME this can probably be optimized
1569 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1570 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1572 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1573 vp9_default_kf_uvmode_probs[b->mode[3]]);
1574 } else if (b->intra) {
1576 if (b->bs > BS_8x8) {
1577 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1578 s->prob.p.y_mode[0]);
1579 s->counts.y_mode[0][b->mode[0]]++;
1580 if (b->bs != BS_8x4) {
1581 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1582 s->prob.p.y_mode[0]);
1583 s->counts.y_mode[0][b->mode[1]]++;
1585 b->mode[1] = b->mode[0];
1587 if (b->bs != BS_4x8) {
1588 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1589 s->prob.p.y_mode[0]);
1590 s->counts.y_mode[0][b->mode[2]]++;
1591 if (b->bs != BS_8x4) {
1592 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1593 s->prob.p.y_mode[0]);
1594 s->counts.y_mode[0][b->mode[3]]++;
1596 b->mode[3] = b->mode[2];
1599 b->mode[2] = b->mode[0];
1600 b->mode[3] = b->mode[1];
1603 static const uint8_t size_group[10] = {
1604 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1606 int sz = size_group[b->bs];
1608 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1609 s->prob.p.y_mode[sz]);
1610 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1611 s->counts.y_mode[sz][b->mode[3]]++;
1613 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1614 s->prob.p.uv_mode[b->mode[3]]);
1615 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1617 static const uint8_t inter_mode_ctx_lut[14][14] = {
1618 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1619 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1620 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1621 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1622 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1623 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1624 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1625 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1626 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1627 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1628 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1629 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1630 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1631 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1634 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1635 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1637 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1639 // read comp_pred flag
1640 if (s->comppredmode != PRED_SWITCHABLE) {
1641 b->comp = s->comppredmode == PRED_COMPREF;
1645 // FIXME add intra as ref=0xff (or -1) to make these easier?
1648 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1650 } else if (s->above_comp_ctx[col]) {
1651 c = 2 + (s->left_intra_ctx[row7] ||
1652 s->left_ref_ctx[row7] == s->fixcompref);
1653 } else if (s->left_comp_ctx[row7]) {
1654 c = 2 + (s->above_intra_ctx[col] ||
1655 s->above_ref_ctx[col] == s->fixcompref);
1657 c = (!s->above_intra_ctx[col] &&
1658 s->above_ref_ctx[col] == s->fixcompref) ^
1659 (!s->left_intra_ctx[row7] &&
1660 s->left_ref_ctx[row & 7] == s->fixcompref);
1663 c = s->above_comp_ctx[col] ? 3 :
1664 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1666 } else if (have_l) {
1667 c = s->left_comp_ctx[row7] ? 3 :
1668 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1672 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1673 s->counts.comp[c][b->comp]++;
1676 // read actual references
1677 // FIXME probably cache a few variables here to prevent repetitive
1678 // memory accesses below
1679 if (b->comp) /* two references */ {
1680 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1682 b->ref[fix_idx] = s->fixcompref;
1683 // FIXME can this codeblob be replaced by some sort of LUT?
1686 if (s->above_intra_ctx[col]) {
1687 if (s->left_intra_ctx[row7]) {
1690 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1692 } else if (s->left_intra_ctx[row7]) {
1693 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1695 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1697 if (refl == refa && refa == s->varcompref[1]) {
1699 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1700 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1701 (refl == s->fixcompref && refa == s->varcompref[0])) {
1704 c = (refa == refl) ? 3 : 1;
1706 } else if (!s->left_comp_ctx[row7]) {
1707 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1710 c = (refl == s->varcompref[1] &&
1711 refa != s->varcompref[1]) ? 2 : 4;
1713 } else if (!s->above_comp_ctx[col]) {
1714 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1717 c = (refa == s->varcompref[1] &&
1718 refl != s->varcompref[1]) ? 2 : 4;
1721 c = (refl == refa) ? 4 : 2;
1725 if (s->above_intra_ctx[col]) {
1727 } else if (s->above_comp_ctx[col]) {
1728 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1730 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1733 } else if (have_l) {
1734 if (s->left_intra_ctx[row7]) {
1736 } else if (s->left_comp_ctx[row7]) {
1737 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1739 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1744 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1745 b->ref[var_idx] = s->varcompref[bit];
1746 s->counts.comp_ref[c][bit]++;
1747 } else /* single reference */ {
1750 if (have_a && !s->above_intra_ctx[col]) {
1751 if (have_l && !s->left_intra_ctx[row7]) {
1752 if (s->left_comp_ctx[row7]) {
1753 if (s->above_comp_ctx[col]) {
1754 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1755 !s->above_ref_ctx[col]);
1757 c = (3 * !s->above_ref_ctx[col]) +
1758 (!s->fixcompref || !s->left_ref_ctx[row7]);
1760 } else if (s->above_comp_ctx[col]) {
1761 c = (3 * !s->left_ref_ctx[row7]) +
1762 (!s->fixcompref || !s->above_ref_ctx[col]);
1764 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1766 } else if (s->above_intra_ctx[col]) {
1768 } else if (s->above_comp_ctx[col]) {
1769 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1771 c = 4 * (!s->above_ref_ctx[col]);
1773 } else if (have_l && !s->left_intra_ctx[row7]) {
1774 if (s->left_intra_ctx[row7]) {
1776 } else if (s->left_comp_ctx[row7]) {
1777 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1779 c = 4 * (!s->left_ref_ctx[row7]);
1784 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1785 s->counts.single_ref[c][0][bit]++;
1789 // FIXME can this codeblob be replaced by some sort of LUT?
1792 if (s->left_intra_ctx[row7]) {
1793 if (s->above_intra_ctx[col]) {
1795 } else if (s->above_comp_ctx[col]) {
1796 c = 1 + 2 * (s->fixcompref == 1 ||
1797 s->above_ref_ctx[col] == 1);
1798 } else if (!s->above_ref_ctx[col]) {
1801 c = 4 * (s->above_ref_ctx[col] == 1);
1803 } else if (s->above_intra_ctx[col]) {
1804 if (s->left_intra_ctx[row7]) {
1806 } else if (s->left_comp_ctx[row7]) {
1807 c = 1 + 2 * (s->fixcompref == 1 ||
1808 s->left_ref_ctx[row7] == 1);
1809 } else if (!s->left_ref_ctx[row7]) {
1812 c = 4 * (s->left_ref_ctx[row7] == 1);
1814 } else if (s->above_comp_ctx[col]) {
1815 if (s->left_comp_ctx[row7]) {
1816 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1817 c = 3 * (s->fixcompref == 1 ||
1818 s->left_ref_ctx[row7] == 1);
1822 } else if (!s->left_ref_ctx[row7]) {
1823 c = 1 + 2 * (s->fixcompref == 1 ||
1824 s->above_ref_ctx[col] == 1);
1826 c = 3 * (s->left_ref_ctx[row7] == 1) +
1827 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1829 } else if (s->left_comp_ctx[row7]) {
1830 if (!s->above_ref_ctx[col]) {
1831 c = 1 + 2 * (s->fixcompref == 1 ||
1832 s->left_ref_ctx[row7] == 1);
1834 c = 3 * (s->above_ref_ctx[col] == 1) +
1835 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1837 } else if (!s->above_ref_ctx[col]) {
1838 if (!s->left_ref_ctx[row7]) {
1841 c = 4 * (s->left_ref_ctx[row7] == 1);
1843 } else if (!s->left_ref_ctx[row7]) {
1844 c = 4 * (s->above_ref_ctx[col] == 1);
1846 c = 2 * (s->left_ref_ctx[row7] == 1) +
1847 2 * (s->above_ref_ctx[col] == 1);
1850 if (s->above_intra_ctx[col] ||
1851 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1853 } else if (s->above_comp_ctx[col]) {
1854 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1856 c = 4 * (s->above_ref_ctx[col] == 1);
1859 } else if (have_l) {
1860 if (s->left_intra_ctx[row7] ||
1861 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1863 } else if (s->left_comp_ctx[row7]) {
1864 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1866 c = 4 * (s->left_ref_ctx[row7] == 1);
1871 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1872 s->counts.single_ref[c][1][bit]++;
1873 b->ref[0] = 1 + bit;
1878 if (b->bs <= BS_8x8) {
1879 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1880 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1882 static const uint8_t off[10] = {
1883 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1886 // FIXME this needs to use the LUT tables from find_ref_mvs
1887 // because not all are -1,0/0,-1
1888 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1889 [s->left_mode_ctx[row7 + off[b->bs]]];
1891 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1892 s->prob.p.mv_mode[c]);
1893 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1894 s->counts.mv_mode[c][b->mode[0] - 10]++;
1898 if (s->filtermode == FILTER_SWITCHABLE) {
1901 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1902 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1903 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1904 s->left_filter_ctx[row7] : 3;
1906 c = s->above_filter_ctx[col];
1908 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1909 c = s->left_filter_ctx[row7];
1914 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1915 s->prob.p.filter[c]);
1916 s->counts.filter[c][filter_id]++;
1917 b->filter = vp9_filter_lut[filter_id];
1919 b->filter = s->filtermode;
1922 if (b->bs > BS_8x8) {
1923 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1925 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1926 s->prob.p.mv_mode[c]);
1927 s->counts.mv_mode[c][b->mode[0] - 10]++;
1928 fill_mv(s, b->mv[0], b->mode[0], 0);
1930 if (b->bs != BS_8x4) {
1931 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1932 s->prob.p.mv_mode[c]);
1933 s->counts.mv_mode[c][b->mode[1] - 10]++;
1934 fill_mv(s, b->mv[1], b->mode[1], 1);
1936 b->mode[1] = b->mode[0];
1937 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1938 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1941 if (b->bs != BS_4x8) {
1942 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1943 s->prob.p.mv_mode[c]);
1944 s->counts.mv_mode[c][b->mode[2] - 10]++;
1945 fill_mv(s, b->mv[2], b->mode[2], 2);
1947 if (b->bs != BS_8x4) {
1948 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1949 s->prob.p.mv_mode[c]);
1950 s->counts.mv_mode[c][b->mode[3] - 10]++;
1951 fill_mv(s, b->mv[3], b->mode[3], 3);
1953 b->mode[3] = b->mode[2];
1954 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1955 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1958 b->mode[2] = b->mode[0];
1959 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1960 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1961 b->mode[3] = b->mode[1];
1962 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1963 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1966 fill_mv(s, b->mv[0], b->mode[0], -1);
1967 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1968 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1969 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1970 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1971 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1972 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1975 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1979 #define SPLAT_CTX(var, val, n) \
1981 case 1: var = val; break; \
1982 case 2: AV_WN16A(&var, val * 0x0101); break; \
1983 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1984 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1986 uint64_t v64 = val * 0x0101010101010101ULL; \
1987 AV_WN64A( &var, v64); \
1988 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1993 #define SPLAT_CTX(var, val, n) \
1995 case 1: var = val; break; \
1996 case 2: AV_WN16A(&var, val * 0x0101); break; \
1997 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1999 uint32_t v32 = val * 0x01010101; \
2000 AV_WN32A( &var, v32); \
2001 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2005 uint32_t v32 = val * 0x01010101; \
2006 AV_WN32A( &var, v32); \
2007 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2008 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2009 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2015 switch (bwh_tab[1][b->bs][0]) {
2016 #define SET_CTXS(dir, off, n) \
2018 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2019 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2020 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2021 if (!s->keyframe && !s->intraonly) { \
2022 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2023 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2024 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2026 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2027 if (s->filtermode == FILTER_SWITCHABLE) { \
2028 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2033 case 1: SET_CTXS(above, col, 1); break;
2034 case 2: SET_CTXS(above, col, 2); break;
2035 case 4: SET_CTXS(above, col, 4); break;
2036 case 8: SET_CTXS(above, col, 8); break;
2038 switch (bwh_tab[1][b->bs][1]) {
2039 case 1: SET_CTXS(left, row7, 1); break;
2040 case 2: SET_CTXS(left, row7, 2); break;
2041 case 4: SET_CTXS(left, row7, 4); break;
2042 case 8: SET_CTXS(left, row7, 8); break;
2047 if (!s->keyframe && !s->intraonly) {
2048 if (b->bs > BS_8x8) {
2049 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2051 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2052 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2053 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2054 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2055 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2056 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2057 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2058 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2060 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2062 for (n = 0; n < w4 * 2; n++) {
2063 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2064 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2066 for (n = 0; n < h4 * 2; n++) {
2067 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2068 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2074 for (y = 0; y < h4; y++) {
2075 int x, o = (row + y) * s->sb_cols * 8 + col;
2076 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2079 for (x = 0; x < w4; x++) {
2083 } else if (b->comp) {
2084 for (x = 0; x < w4; x++) {
2085 mv[x].ref[0] = b->ref[0];
2086 mv[x].ref[1] = b->ref[1];
2087 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2088 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2091 for (x = 0; x < w4; x++) {
2092 mv[x].ref[0] = b->ref[0];
2094 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2100 // FIXME merge cnt/eob arguments?
2101 static av_always_inline int
2102 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2103 int is_tx32x32, unsigned (*cnt)[6][3],
2104 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2105 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2106 const int16_t *band_counts, const int16_t *qmul)
2108 int i = 0, band = 0, band_left = band_counts[band];
2109 uint8_t *tp = p[0][nnz];
2110 uint8_t cache[1024];
2115 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2116 eob[band][nnz][val]++;
2121 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2122 cnt[band][nnz][0]++;
2124 band_left = band_counts[++band];
2126 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2128 if (++i == n_coeffs)
2129 break; //invalid input; blocks should end with EOB
2134 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2135 cnt[band][nnz][1]++;
2139 // fill in p[3-10] (model fill) - only once per frame for each pos
2141 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2143 cnt[band][nnz][2]++;
2144 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2145 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2146 cache[rc] = val = 2;
2148 val = 3 + vp56_rac_get_prob(c, tp[5]);
2151 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2153 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2154 val = 5 + vp56_rac_get_prob(c, 159);
2156 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2157 val += vp56_rac_get_prob(c, 145);
2161 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2162 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2163 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2164 val += (vp56_rac_get_prob(c, 148) << 1);
2165 val += vp56_rac_get_prob(c, 140);
2167 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2168 val += (vp56_rac_get_prob(c, 155) << 2);
2169 val += (vp56_rac_get_prob(c, 140) << 1);
2170 val += vp56_rac_get_prob(c, 135);
2172 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2173 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2174 val += (vp56_rac_get_prob(c, 157) << 3);
2175 val += (vp56_rac_get_prob(c, 141) << 2);
2176 val += (vp56_rac_get_prob(c, 134) << 1);
2177 val += vp56_rac_get_prob(c, 130);
2179 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2180 val += (vp56_rac_get_prob(c, 254) << 12);
2181 val += (vp56_rac_get_prob(c, 254) << 11);
2182 val += (vp56_rac_get_prob(c, 252) << 10);
2183 val += (vp56_rac_get_prob(c, 249) << 9);
2184 val += (vp56_rac_get_prob(c, 243) << 8);
2185 val += (vp56_rac_get_prob(c, 230) << 7);
2186 val += (vp56_rac_get_prob(c, 196) << 6);
2187 val += (vp56_rac_get_prob(c, 177) << 5);
2188 val += (vp56_rac_get_prob(c, 153) << 4);
2189 val += (vp56_rac_get_prob(c, 140) << 3);
2190 val += (vp56_rac_get_prob(c, 133) << 2);
2191 val += (vp56_rac_get_prob(c, 130) << 1);
2192 val += vp56_rac_get_prob(c, 129);
2197 band_left = band_counts[++band];
2199 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2201 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2202 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2204 } while (++i < n_coeffs);
2209 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2210 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2211 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2212 const int16_t (*nb)[2], const int16_t *band_counts,
2213 const int16_t *qmul)
2215 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2216 nnz, scan, nb, band_counts, qmul);
2219 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2220 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2221 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2222 const int16_t (*nb)[2], const int16_t *band_counts,
2223 const int16_t *qmul)
2225 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2226 nnz, scan, nb, band_counts, qmul);
2229 static void decode_coeffs(AVCodecContext *ctx)
2231 VP9Context *s = ctx->priv_data;
2233 int row = s->row, col = s->col;
2234 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2235 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2236 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2237 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2238 int end_x = FFMIN(2 * (s->cols - col), w4);
2239 int end_y = FFMIN(2 * (s->rows - row), h4);
2240 int n, pl, x, y, res;
2241 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2242 int tx = 4 * s->lossless + b->tx;
2243 const int16_t * const *yscans = vp9_scans[tx];
2244 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2245 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2246 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2247 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2248 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2249 static const int16_t band_counts[4][8] = {
2250 { 1, 2, 3, 4, 3, 16 - 13 },
2251 { 1, 2, 3, 4, 11, 64 - 21 },
2252 { 1, 2, 3, 4, 11, 256 - 21 },
2253 { 1, 2, 3, 4, 11, 1024 - 21 },
2255 const int16_t *y_band_counts = band_counts[b->tx];
2256 const int16_t *uv_band_counts = band_counts[b->uvtx];
2258 #define MERGE(la, end, step, rd) \
2259 for (n = 0; n < end; n += step) \
2260 la[n] = !!rd(&la[n])
2261 #define MERGE_CTX(step, rd) \
2263 MERGE(l, end_y, step, rd); \
2264 MERGE(a, end_x, step, rd); \
2267 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2268 for (n = 0, y = 0; y < end_y; y += step) { \
2269 for (x = 0; x < end_x; x += step, n += step * step) { \
2270 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2271 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2272 c, e, p, a[x] + l[y], yscans[txtp], \
2273 ynbs[txtp], y_band_counts, qmul[0]); \
2274 a[x] = l[y] = !!res; \
2276 AV_WN16A(&s->eob[n], res); \
2283 #define SPLAT(la, end, step, cond) \
2285 for (n = 1; n < end; n += step) \
2286 la[n] = la[n - 1]; \
2287 } else if (step == 4) { \
2289 for (n = 0; n < end; n += step) \
2290 AV_WN32A(&la[n], la[n] * 0x01010101); \
2292 for (n = 0; n < end; n += step) \
2293 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2295 } else /* step == 8 */ { \
2297 if (HAVE_FAST_64BIT) { \
2298 for (n = 0; n < end; n += step) \
2299 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2301 for (n = 0; n < end; n += step) { \
2302 uint32_t v32 = la[n] * 0x01010101; \
2303 AV_WN32A(&la[n], v32); \
2304 AV_WN32A(&la[n + 4], v32); \
2308 for (n = 0; n < end; n += step) \
2309 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2312 #define SPLAT_CTX(step) \
2314 SPLAT(a, end_x, step, end_x == w4); \
2315 SPLAT(l, end_y, step, end_y == h4); \
2321 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2324 MERGE_CTX(2, AV_RN16A);
2325 DECODE_Y_COEF_LOOP(2, 0,);
2329 MERGE_CTX(4, AV_RN32A);
2330 DECODE_Y_COEF_LOOP(4, 0,);
2334 MERGE_CTX(8, AV_RN64A);
2335 DECODE_Y_COEF_LOOP(8, 0, 32);
2340 #define DECODE_UV_COEF_LOOP(step, decode_coeffs_fn) \
2341 for (n = 0, y = 0; y < end_y; y += step) { \
2342 for (x = 0; x < end_x; x += step, n += step * step) { \
2343 res = decode_coeffs_fn(&s->c, s->uvblock[pl] + 16 * n, \
2344 16 * step * step, c, e, p, a[x] + l[y], \
2345 uvscan, uvnb, uv_band_counts, qmul[1]); \
2346 a[x] = l[y] = !!res; \
2348 AV_WN16A(&s->uveob[pl][n], res); \
2350 s->uveob[pl][n] = res; \
2355 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2356 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2357 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2362 for (pl = 0; pl < 2; pl++) {
2363 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2364 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2367 DECODE_UV_COEF_LOOP(1, decode_coeffs_b);
2370 MERGE_CTX(2, AV_RN16A);
2371 DECODE_UV_COEF_LOOP(2, decode_coeffs_b);
2375 MERGE_CTX(4, AV_RN32A);
2376 DECODE_UV_COEF_LOOP(4, decode_coeffs_b);
2380 MERGE_CTX(8, AV_RN64A);
2381 DECODE_UV_COEF_LOOP(8, decode_coeffs_b32);
2388 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2389 uint8_t *dst_edge, ptrdiff_t stride_edge,
2390 uint8_t *dst_inner, ptrdiff_t stride_inner,
2391 uint8_t *l, int col, int x, int w,
2392 int row, int y, enum TxfmMode tx,
2393 int p, int ss_h, int ss_v)
2395 int have_top = row > 0 || y > 0;
2396 int have_left = col > s->tiling.tile_col_start || x > 0;
2397 int have_right = x < w - 1;
2398 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2399 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2400 { DC_127_PRED, VERT_PRED } },
2401 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2402 { HOR_PRED, HOR_PRED } },
2403 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2404 { LEFT_DC_PRED, DC_PRED } },
2405 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2406 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2407 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2408 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2409 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2410 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2411 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2412 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2413 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2414 { DC_127_PRED, VERT_LEFT_PRED } },
2415 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2416 { HOR_UP_PRED, HOR_UP_PRED } },
2417 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2418 { HOR_PRED, TM_VP8_PRED } },
2420 static const struct {
2421 uint8_t needs_left:1;
2422 uint8_t needs_top:1;
2423 uint8_t needs_topleft:1;
2424 uint8_t needs_topright:1;
2425 uint8_t invert_left:1;
2426 } edges[N_INTRA_PRED_MODES] = {
2427 [VERT_PRED] = { .needs_top = 1 },
2428 [HOR_PRED] = { .needs_left = 1 },
2429 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2430 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2431 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2432 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2433 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2434 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2435 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2436 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2437 [LEFT_DC_PRED] = { .needs_left = 1 },
2438 [TOP_DC_PRED] = { .needs_top = 1 },
2439 [DC_128_PRED] = { 0 },
2440 [DC_127_PRED] = { 0 },
2441 [DC_129_PRED] = { 0 }
2444 av_assert2(mode >= 0 && mode < 10);
2445 mode = mode_conv[mode][have_left][have_top];
2446 if (edges[mode].needs_top) {
2447 uint8_t *top, *topleft;
2448 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2449 int n_px_need_tr = 0;
2451 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2454 // if top of sb64-row, use s->intra_pred_data[] instead of
2455 // dst[-stride] for intra prediction (it contains pre- instead of
2456 // post-loopfilter data)
2458 top = !(row & 7) && !y ?
2459 s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
2460 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2462 topleft = !(row & 7) && !y ?
2463 s->intra_pred_data[p] + col * (8 >> ss_h) + x * 4 :
2464 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2465 &dst_inner[-stride_inner];
2469 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2470 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2471 n_px_need + n_px_need_tr <= n_px_have) {
2475 if (n_px_need <= n_px_have) {
2476 memcpy(*a, top, n_px_need);
2478 memcpy(*a, top, n_px_have);
2479 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2480 n_px_need - n_px_have);
2483 memset(*a, 127, n_px_need);
2485 if (edges[mode].needs_topleft) {
2486 if (have_left && have_top) {
2487 (*a)[-1] = topleft[-1];
2489 (*a)[-1] = have_top ? 129 : 127;
2492 if (tx == TX_4X4 && edges[mode].needs_topright) {
2493 if (have_top && have_right &&
2494 n_px_need + n_px_need_tr <= n_px_have) {
2495 memcpy(&(*a)[4], &top[4], 4);
2497 memset(&(*a)[4], (*a)[3], 4);
2502 if (edges[mode].needs_left) {
2504 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2505 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2506 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2508 if (edges[mode].invert_left) {
2509 if (n_px_need <= n_px_have) {
2510 for (i = 0; i < n_px_need; i++)
2511 l[i] = dst[i * stride - 1];
2513 for (i = 0; i < n_px_have; i++)
2514 l[i] = dst[i * stride - 1];
2515 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2518 if (n_px_need <= n_px_have) {
2519 for (i = 0; i < n_px_need; i++)
2520 l[n_px_need - 1 - i] = dst[i * stride - 1];
2522 for (i = 0; i < n_px_have; i++)
2523 l[n_px_need - 1 - i] = dst[i * stride - 1];
2524 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2528 memset(l, 129, 4 << tx);
2535 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2537 VP9Context *s = ctx->priv_data;
2539 int row = s->row, col = s->col;
2540 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2541 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2542 int end_x = FFMIN(2 * (s->cols - col), w4);
2543 int end_y = FFMIN(2 * (s->rows - row), h4);
2544 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2545 int uvstep1d = 1 << b->uvtx, p;
2546 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2547 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2548 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2550 for (n = 0, y = 0; y < end_y; y += step1d) {
2551 uint8_t *ptr = dst, *ptr_r = dst_r;
2552 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2553 ptr_r += 4 * step1d, n += step) {
2554 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2556 uint8_t *a = &a_buf[32];
2557 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2558 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2560 mode = check_intra_mode(s, mode, &a, ptr_r,
2561 s->frames[CUR_FRAME].tf.f->linesize[0],
2562 ptr, s->y_stride, l,
2563 col, x, w4, row, y, b->tx, 0, 0, 0);
2564 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2566 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2567 s->block + 16 * n, eob);
2569 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2570 dst += 4 * step1d * s->y_stride;
2577 step = 1 << (b->uvtx * 2);
2578 for (p = 0; p < 2; p++) {
2579 dst = s->dst[1 + p];
2580 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2581 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2582 uint8_t *ptr = dst, *ptr_r = dst_r;
2583 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2584 ptr_r += 4 * uvstep1d, n += step) {
2585 int mode = b->uvmode;
2586 uint8_t *a = &a_buf[32];
2587 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2589 mode = check_intra_mode(s, mode, &a, ptr_r,
2590 s->frames[CUR_FRAME].tf.f->linesize[1],
2591 ptr, s->uv_stride, l, col, x, w4, row, y,
2592 b->uvtx, p + 1, s->ss_h, s->ss_v);
2593 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2595 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2596 s->uvblock[p] + 16 * n, eob);
2598 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2599 dst += 4 * uvstep1d * s->uv_stride;
2604 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2605 uint8_t *dst, ptrdiff_t dst_stride,
2606 const uint8_t *ref, ptrdiff_t ref_stride,
2607 ThreadFrame *ref_frame,
2608 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2609 int bw, int bh, int w, int h,
2610 const uint16_t *scale, const uint8_t *step)
2612 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2613 // BUG libvpx seems to scale the two components separately. This introduces
2614 // rounding errors but we have to reproduce them to be exactly compatible
2615 // with the output from libvpx...
2616 int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2617 int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2618 int refbw_m1, refbh_m1;
2623 ref += y * ref_stride + x;
2626 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2627 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2628 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2629 // we use +7 because the last 7 pixels of each sbrow can be changed in
2630 // the longest loopfilter of the next sbrow
2631 th = (y + refbh_m1 + 4 + 7) >> 6;
2632 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2633 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2634 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2635 ref - 3 * ref_stride - 3,
2637 refbw_m1 + 8, refbh_m1 + 8,
2638 x - 3, y - 3, w, h);
2639 ref = s->edge_emu_buffer + 3 * 144 + 3;
2642 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2645 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2646 uint8_t *dst_u, uint8_t *dst_v,
2647 ptrdiff_t dst_stride,
2648 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2649 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2650 ThreadFrame *ref_frame,
2651 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2652 int bw, int bh, int w, int h,
2653 const uint16_t *scale, const uint8_t *step)
2655 // BUG https://code.google.com/p/webm/issues/detail?id=820
2656 int mx = scale_mv(mv->x << !s->ss_h, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2657 int my = scale_mv(mv->y << !s->ss_v, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2659 int refbw_m1, refbh_m1;
2664 ref_u += y * src_stride_u + x;
2665 ref_v += y * src_stride_v + x;
2668 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2669 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2670 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2671 // we use +7 because the last 7 pixels of each sbrow can be changed in
2672 // the longest loopfilter of the next sbrow
2673 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2674 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2675 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2676 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2677 ref_u - 3 * src_stride_u - 3,
2679 refbw_m1 + 8, refbh_m1 + 8,
2680 x - 3, y - 3, w, h);
2681 ref_u = s->edge_emu_buffer + 3 * 144 + 3;
2682 smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2684 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2685 ref_v - 3 * src_stride_v - 3,
2687 refbw_m1 + 8, refbh_m1 + 8,
2688 x - 3, y - 3, w, h);
2689 ref_v = s->edge_emu_buffer + 3 * 144 + 3;
2690 smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2692 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2693 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2697 #define FN(x) x##_scaled
2698 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2699 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2700 mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2701 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2702 row, col, mv, bw, bh, w, h, i) \
2703 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2704 row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2705 #include "vp9_mc_template.c"
2707 #undef mc_chroma_dir
2710 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2711 uint8_t *dst, ptrdiff_t dst_stride,
2712 const uint8_t *ref, ptrdiff_t ref_stride,
2713 ThreadFrame *ref_frame,
2714 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2715 int bw, int bh, int w, int h)
2717 int mx = mv->x, my = mv->y, th;
2721 ref += y * ref_stride + x;
2724 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2725 // we use +7 because the last 7 pixels of each sbrow can be changed in
2726 // the longest loopfilter of the next sbrow
2727 th = (y + bh + 4 * !!my + 7) >> 6;
2728 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2729 if (x < !!mx * 3 || y < !!my * 3 ||
2730 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2731 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2732 ref - !!my * 3 * ref_stride - !!mx * 3,
2734 bw + !!mx * 7, bh + !!my * 7,
2735 x - !!mx * 3, y - !!my * 3, w, h);
2736 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2739 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2742 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2743 uint8_t *dst_u, uint8_t *dst_v,
2744 ptrdiff_t dst_stride,
2745 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2746 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2747 ThreadFrame *ref_frame,
2748 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2749 int bw, int bh, int w, int h)
2751 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2755 ref_u += y * src_stride_u + x;
2756 ref_v += y * src_stride_v + x;
2759 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2760 // we use +7 because the last 7 pixels of each sbrow can be changed in
2761 // the longest loopfilter of the next sbrow
2762 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2763 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2764 if (x < !!mx * 3 || y < !!my * 3 ||
2765 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2766 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2767 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2769 bw + !!mx * 7, bh + !!my * 7,
2770 x - !!mx * 3, y - !!my * 3, w, h);
2771 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2772 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2774 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2775 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2777 bw + !!mx * 7, bh + !!my * 7,
2778 x - !!mx * 3, y - !!my * 3, w, h);
2779 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2780 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2782 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2783 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2788 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2789 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2791 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2792 row, col, mv, bw, bh, w, h, i) \
2793 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2794 row, col, mv, bw, bh, w, h)
2795 #include "vp9_mc_template.c"
2796 #undef mc_luma_dir_dir
2797 #undef mc_chroma_dir_dir
2800 static void inter_recon(AVCodecContext *ctx)
2802 VP9Context *s = ctx->priv_data;
2804 int row = s->row, col = s->col;
2806 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2807 inter_pred_scaled(ctx);
2812 /* mostly copied intra_recon() */
2814 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2815 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2816 int end_x = FFMIN(2 * (s->cols - col), w4);
2817 int end_y = FFMIN(2 * (s->rows - row), h4);
2818 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2819 int uvstep1d = 1 << b->uvtx, p;
2820 uint8_t *dst = s->dst[0];
2823 for (n = 0, y = 0; y < end_y; y += step1d) {
2825 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2826 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2829 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2830 s->block + 16 * n, eob);
2832 dst += 4 * s->y_stride * step1d;
2838 step = 1 << (b->uvtx * 2);
2839 for (p = 0; p < 2; p++) {
2840 dst = s->dst[p + 1];
2841 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2843 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2844 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2847 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2848 s->uvblock[p] + 16 * n, eob);
2850 dst += 4 * uvstep1d * s->uv_stride;
2856 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2857 int row_and_7, int col_and_7,
2858 int w, int h, int col_end, int row_end,
2859 enum TxfmMode tx, int skip_inter)
2861 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2862 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2863 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2864 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2866 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2867 // edges. This means that for UV, we work on two subsampled blocks at
2868 // a time, and we only use the topleft block's mode information to set
2869 // things like block strength. Thus, for any block size smaller than
2870 // 16x16, ignore the odd portion of the block.
2871 if (tx == TX_4X4 && is_uv) {
2886 if (tx == TX_4X4 && !skip_inter) {
2887 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2888 int m_col_odd = (t << (w - 1)) - t;
2890 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2892 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2894 for (y = row_and_7; y < h + row_and_7; y++) {
2895 int col_mask_id = 2 - !(y & 7);
2897 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2898 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2899 // for odd lines, if the odd col is not being filtered,
2900 // skip odd row also:
2907 // if a/c are even row/col and b/d are odd, and d is skipped,
2908 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2909 if ((col_end & 1) && (y & 1)) {
2910 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2912 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2916 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2918 for (y = row_and_7; y < h + row_and_7; y++) {
2919 int col_mask_id = 2 - !(y & 3);
2921 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2922 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2923 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2924 lflvl->mask[is_uv][0][y][3] |= m_col;
2925 lflvl->mask[is_uv][1][y][3] |= m_col;
2929 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2932 int mask_id = (tx == TX_8X8);
2933 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2934 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2935 int m_row = m_col & masks[l2];
2937 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2938 // 8wd loopfilter to prevent going off the visible edge.
2939 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2940 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2941 int m_row_8 = m_row - m_row_16;
2943 for (y = row_and_7; y < h + row_and_7; y++) {
2944 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2945 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2948 for (y = row_and_7; y < h + row_and_7; y++)
2949 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2952 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2953 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2954 lflvl->mask[is_uv][1][y][0] |= m_col;
2955 if (y - row_and_7 == h - 1)
2956 lflvl->mask[is_uv][1][y][1] |= m_col;
2958 for (y = row_and_7; y < h + row_and_7; y += step1d)
2959 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2961 } else if (tx != TX_4X4) {
2964 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2965 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2966 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2967 for (y = row_and_7; y < h + row_and_7; y++)
2968 lflvl->mask[is_uv][0][y][mask_id] |= t;
2970 int t8 = t & 0x01, t4 = t - t8;
2972 for (y = row_and_7; y < h + row_and_7; y++) {
2973 lflvl->mask[is_uv][0][y][2] |= t4;
2974 lflvl->mask[is_uv][0][y][1] |= t8;
2976 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2978 int t8 = t & 0x11, t4 = t - t8;
2980 for (y = row_and_7; y < h + row_and_7; y++) {
2981 lflvl->mask[is_uv][0][y][2] |= t4;
2982 lflvl->mask[is_uv][0][y][1] |= t8;
2984 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2989 static void decode_b(AVCodecContext *ctx, int row, int col,
2990 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2991 enum BlockLevel bl, enum BlockPartition bp)
2993 VP9Context *s = ctx->priv_data;
2995 enum BlockSize bs = bl * 3 + bp;
2996 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2998 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3004 s->min_mv.x = -(128 + col * 64);
3005 s->min_mv.y = -(128 + row * 64);
3006 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3007 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3013 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3014 (s->ss_v && h4 * 2 == (1 << b->tx)));
3021 #define SPLAT_ZERO_CTX(v, n) \
3023 case 1: v = 0; break; \
3024 case 2: AV_ZERO16(&v); break; \
3025 case 4: AV_ZERO32(&v); break; \
3026 case 8: AV_ZERO64(&v); break; \
3027 case 16: AV_ZERO128(&v); break; \
3029 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3031 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3032 if (s->ss_##dir2) { \
3033 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3034 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3036 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3037 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3042 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3043 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3044 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3045 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3048 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3049 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3050 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3051 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3056 s->block += w4 * h4 * 64;
3057 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3058 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3059 s->eob += 4 * w4 * h4;
3060 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3061 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3067 // emulated overhangs if the stride of the target buffer can't hold. This
3068 // allows to support emu-edge and so on even if we have large block
3070 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3071 (row + h4) > s->rows;
3072 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3073 (row + h4) > s->rows;
3075 s->dst[0] = s->tmp_y;
3078 s->dst[0] = f->data[0] + yoff;
3079 s->y_stride = f->linesize[0];
3082 s->dst[1] = s->tmp_uv[0];
3083 s->dst[2] = s->tmp_uv[1];
3086 s->dst[1] = f->data[1] + uvoff;
3087 s->dst[2] = f->data[2] + uvoff;
3088 s->uv_stride = f->linesize[1];
3091 intra_recon(ctx, yoff, uvoff);
3096 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3098 for (n = 0; o < w; n++) {
3103 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3104 s->tmp_y + o, 64, h, 0, 0);
3110 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3112 for (n = 1; o < w; n++) {
3117 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3118 s->tmp_uv[0] + o, 32, h, 0, 0);
3119 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3120 s->tmp_uv[1] + o, 32, h, 0, 0);
3126 // pick filter level and find edges to apply filter to
3127 if (s->filter.level &&
3128 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3129 [b->mode[3] != ZEROMV]) > 0) {
3130 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3131 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3133 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3134 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3135 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3136 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3137 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3138 b->uvtx, skip_inter);
3140 if (!s->filter.lim_lut[lvl]) {
3141 int sharp = s->filter.sharpness;
3145 limit >>= (sharp + 3) >> 2;
3146 limit = FFMIN(limit, 9 - sharp);
3148 limit = FFMAX(limit, 1);
3150 s->filter.lim_lut[lvl] = limit;
3151 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3157 s->block += w4 * h4 * 64;
3158 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3159 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3160 s->eob += 4 * w4 * h4;
3161 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3162 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3166 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3167 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3169 VP9Context *s = ctx->priv_data;
3170 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3171 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3172 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3173 s->prob.p.partition[bl][c];
3174 enum BlockPartition bp;
3175 ptrdiff_t hbs = 4 >> bl;
3176 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3177 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3180 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3181 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3182 } else if (col + hbs < s->cols) { // FIXME why not <=?
3183 if (row + hbs < s->rows) { // FIXME why not <=?
3184 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3186 case PARTITION_NONE:
3187 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3190 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3191 yoff += hbs * 8 * y_stride;
3192 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3193 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3196 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3198 uvoff += hbs * 8 >> s->ss_h;
3199 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3201 case PARTITION_SPLIT:
3202 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3203 decode_sb(ctx, row, col + hbs, lflvl,
3204 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3205 yoff += hbs * 8 * y_stride;
3206 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3207 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3208 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3209 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3214 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3215 bp = PARTITION_SPLIT;
3216 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3217 decode_sb(ctx, row, col + hbs, lflvl,
3218 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3221 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3223 } else if (row + hbs < s->rows) { // FIXME why not <=?
3224 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3225 bp = PARTITION_SPLIT;
3226 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3227 yoff += hbs * 8 * y_stride;
3228 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3229 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3232 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3235 bp = PARTITION_SPLIT;
3236 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3238 s->counts.partition[bl][c][bp]++;
3241 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3242 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3244 VP9Context *s = ctx->priv_data;
3246 ptrdiff_t hbs = 4 >> bl;
3247 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3248 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3251 av_assert2(b->bl == BL_8X8);
3252 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3253 } else if (s->b->bl == bl) {
3254 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3255 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3256 yoff += hbs * 8 * y_stride;
3257 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3258 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3259 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3261 uvoff += hbs * 8 >> s->ss_h;
3262 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3265 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3266 if (col + hbs < s->cols) { // FIXME why not <=?
3267 if (row + hbs < s->rows) {
3268 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3269 uvoff + (8 * hbs >> s->ss_h), bl + 1);
3270 yoff += hbs * 8 * y_stride;
3271 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3272 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3273 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3274 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3277 uvoff += hbs * 8 >> s->ss_h;
3278 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3280 } else if (row + hbs < s->rows) {
3281 yoff += hbs * 8 * y_stride;
3282 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3283 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3288 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3289 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3291 VP9Context *s = ctx->priv_data;
3292 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3293 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3294 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3297 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3298 // if you think of them as acting on a 8x8 block max, we can interleave
3299 // each v/h within the single x loop, but that only works if we work on
3300 // 8 pixel blocks, and we won't always do that (we want at least 16px
3301 // to use SSE2 optimizations, perhaps 32 for AVX2)
3303 // filter edges between columns, Y plane (e.g. block1 | block2)
3304 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3305 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3306 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3307 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3308 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3309 unsigned hm = hm1 | hm2 | hm13 | hm23;
3311 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3313 int L = *l, H = L >> 4;
3314 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3317 if (hmask1[0] & x) {
3318 if (hmask2[0] & x) {
3319 av_assert2(l[8] == L);
3320 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3322 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3324 } else if (hm2 & x) {
3327 E |= s->filter.mblim_lut[L] << 8;
3328 I |= s->filter.lim_lut[L] << 8;
3329 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3331 [0](ptr, ls_y, E, I, H);
3333 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3334 [0](ptr, ls_y, E, I, H);
3337 } else if (hm2 & x) {
3338 int L = l[8], H = L >> 4;
3339 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3342 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3343 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3347 int L = *l, H = L >> 4;
3348 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3353 E |= s->filter.mblim_lut[L] << 8;
3354 I |= s->filter.lim_lut[L] << 8;
3355 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3357 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3359 } else if (hm23 & x) {
3360 int L = l[8], H = L >> 4;
3361 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3363 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3369 // filter edges between rows, Y plane (e.g. ------)
3371 dst = f->data[0] + yoff;
3373 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3374 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3375 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3377 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3380 int L = *l, H = L >> 4;
3381 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3384 if (vmask[0] & (x << 1)) {
3385 av_assert2(l[1] == L);
3386 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3388 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3390 } else if (vm & (x << 1)) {
3393 E |= s->filter.mblim_lut[L] << 8;
3394 I |= s->filter.lim_lut[L] << 8;
3395 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3396 [!!(vmask[1] & (x << 1))]
3397 [1](ptr, ls_y, E, I, H);
3399 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3400 [1](ptr, ls_y, E, I, H);
3402 } else if (vm & (x << 1)) {
3403 int L = l[1], H = L >> 4;
3404 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3406 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3407 [1](ptr + 8, ls_y, E, I, H);
3411 int L = *l, H = L >> 4;
3412 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3414 if (vm3 & (x << 1)) {
3417 E |= s->filter.mblim_lut[L] << 8;
3418 I |= s->filter.lim_lut[L] << 8;
3419 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3421 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3423 } else if (vm3 & (x << 1)) {
3424 int L = l[1], H = L >> 4;
3425 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3427 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3432 // same principle but for U/V planes
3433 for (p = 0; p < 2; p++) {
3435 dst = f->data[1 + p] + uvoff;
3436 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3437 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3438 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3439 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3440 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3442 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3445 int L = *l, H = L >> 4;
3446 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3448 if (hmask1[0] & x) {
3449 if (hmask2[0] & x) {
3450 av_assert2(l[16] == L);
3451 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3453 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3455 } else if (hm2 & x) {
3458 E |= s->filter.mblim_lut[L] << 8;
3459 I |= s->filter.lim_lut[L] << 8;
3460 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3462 [0](ptr, ls_uv, E, I, H);
3464 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3465 [0](ptr, ls_uv, E, I, H);
3467 } else if (hm2 & x) {
3468 int L = l[16], H = L >> 4;
3469 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3471 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3472 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3480 dst = f->data[1 + p] + uvoff;
3481 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3482 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3483 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3485 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3488 int L = *l, H = L >> 4;
3489 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3492 if (vmask[0] & (x << 2)) {
3493 av_assert2(l[2] == L);
3494 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3496 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3498 } else if (vm & (x << 2)) {
3501 E |= s->filter.mblim_lut[L] << 8;
3502 I |= s->filter.lim_lut[L] << 8;
3503 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3504 [!!(vmask[1] & (x << 2))]
3505 [1](ptr, ls_uv, E, I, H);
3507 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3508 [1](ptr, ls_uv, E, I, H);
3510 } else if (vm & (x << 2)) {
3511 int L = l[2], H = L >> 4;
3512 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3514 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3515 [1](ptr + 8, ls_uv, E, I, H);
3525 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3527 int sb_start = ( idx * n) >> log2_n;
3528 int sb_end = ((idx + 1) * n) >> log2_n;
3529 *start = FFMIN(sb_start, n) << 3;
3530 *end = FFMIN(sb_end, n) << 3;
3533 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3534 int max_count, int update_factor)
3536 unsigned ct = ct0 + ct1, p2, p1;
3542 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3543 p2 = av_clip(p2, 1, 255);
3544 ct = FFMIN(ct, max_count);
3545 update_factor = FASTDIV(update_factor * ct, max_count);
3547 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3548 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3551 static void adapt_probs(VP9Context *s)
3554 prob_context *p = &s->prob_ctx[s->framectxid].p;
3555 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3558 for (i = 0; i < 4; i++)
3559 for (j = 0; j < 2; j++)
3560 for (k = 0; k < 2; k++)
3561 for (l = 0; l < 6; l++)
3562 for (m = 0; m < 6; m++) {
3563 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3564 unsigned *e = s->counts.eob[i][j][k][l][m];
3565 unsigned *c = s->counts.coef[i][j][k][l][m];
3567 if (l == 0 && m >= 3) // dc only has 3 pt
3570 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3571 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3572 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3575 if (s->keyframe || s->intraonly) {
3576 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3577 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3578 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3579 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3584 for (i = 0; i < 3; i++)
3585 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3588 for (i = 0; i < 4; i++)
3589 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3592 if (s->comppredmode == PRED_SWITCHABLE) {
3593 for (i = 0; i < 5; i++)
3594 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3598 if (s->comppredmode != PRED_SINGLEREF) {
3599 for (i = 0; i < 5; i++)
3600 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3601 s->counts.comp_ref[i][1], 20, 128);
3604 if (s->comppredmode != PRED_COMPREF) {
3605 for (i = 0; i < 5; i++) {
3606 uint8_t *pp = p->single_ref[i];
3607 unsigned (*c)[2] = s->counts.single_ref[i];
3609 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3610 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3614 // block partitioning
3615 for (i = 0; i < 4; i++)
3616 for (j = 0; j < 4; j++) {
3617 uint8_t *pp = p->partition[i][j];
3618 unsigned *c = s->counts.partition[i][j];
3620 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3621 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3622 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3626 if (s->txfmmode == TX_SWITCHABLE) {
3627 for (i = 0; i < 2; i++) {
3628 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3630 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3631 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3632 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3633 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3634 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3635 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3639 // interpolation filter
3640 if (s->filtermode == FILTER_SWITCHABLE) {
3641 for (i = 0; i < 4; i++) {
3642 uint8_t *pp = p->filter[i];
3643 unsigned *c = s->counts.filter[i];
3645 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3646 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3651 for (i = 0; i < 7; i++) {
3652 uint8_t *pp = p->mv_mode[i];
3653 unsigned *c = s->counts.mv_mode[i];
3655 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3656 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3657 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3662 uint8_t *pp = p->mv_joint;
3663 unsigned *c = s->counts.mv_joint;
3665 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3666 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3667 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3671 for (i = 0; i < 2; i++) {
3673 unsigned *c, (*c2)[2], sum;
3675 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3676 s->counts.mv_comp[i].sign[1], 20, 128);
3678 pp = p->mv_comp[i].classes;
3679 c = s->counts.mv_comp[i].classes;
3680 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3681 adapt_prob(&pp[0], c[0], sum, 20, 128);
3683 adapt_prob(&pp[1], c[1], sum, 20, 128);
3685 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3686 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3688 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3689 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3691 adapt_prob(&pp[6], c[6], sum, 20, 128);
3692 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3693 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3694 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3696 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3697 s->counts.mv_comp[i].class0[1], 20, 128);
3698 pp = p->mv_comp[i].bits;
3699 c2 = s->counts.mv_comp[i].bits;
3700 for (j = 0; j < 10; j++)
3701 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3703 for (j = 0; j < 2; j++) {
3704 pp = p->mv_comp[i].class0_fp[j];
3705 c = s->counts.mv_comp[i].class0_fp[j];
3706 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3707 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3708 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3710 pp = p->mv_comp[i].fp;
3711 c = s->counts.mv_comp[i].fp;
3712 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3713 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3714 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3716 if (s->highprecisionmvs) {
3717 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3718 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3719 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3720 s->counts.mv_comp[i].hp[1], 20, 128);
3725 for (i = 0; i < 4; i++) {
3726 uint8_t *pp = p->y_mode[i];
3727 unsigned *c = s->counts.y_mode[i], sum, s2;
3729 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3730 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3731 sum -= c[TM_VP8_PRED];
3732 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3733 sum -= c[VERT_PRED];
3734 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3735 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3737 adapt_prob(&pp[3], s2, sum, 20, 128);
3739 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3740 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3741 sum -= c[DIAG_DOWN_LEFT_PRED];
3742 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3743 sum -= c[VERT_LEFT_PRED];
3744 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3745 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3749 for (i = 0; i < 10; i++) {
3750 uint8_t *pp = p->uv_mode[i];
3751 unsigned *c = s->counts.uv_mode[i], sum, s2;
3753 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3754 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3755 sum -= c[TM_VP8_PRED];
3756 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3757 sum -= c[VERT_PRED];
3758 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3759 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3761 adapt_prob(&pp[3], s2, sum, 20, 128);
3763 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3764 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3765 sum -= c[DIAG_DOWN_LEFT_PRED];
3766 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3767 sum -= c[VERT_LEFT_PRED];
3768 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3769 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3773 static void free_buffers(VP9Context *s)
3775 av_freep(&s->intra_pred_data[0]);
3776 av_freep(&s->b_base);
3777 av_freep(&s->block_base);
3780 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3782 VP9Context *s = ctx->priv_data;
3785 for (i = 0; i < 3; i++) {
3786 if (s->frames[i].tf.f->data[0])
3787 vp9_unref_frame(ctx, &s->frames[i]);
3788 av_frame_free(&s->frames[i].tf.f);
3790 for (i = 0; i < 8; i++) {
3791 if (s->refs[i].f->data[0])
3792 ff_thread_release_buffer(ctx, &s->refs[i]);
3793 av_frame_free(&s->refs[i].f);
3794 if (s->next_refs[i].f->data[0])
3795 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3796 av_frame_free(&s->next_refs[i].f);
3806 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3807 int *got_frame, AVPacket *pkt)
3809 const uint8_t *data = pkt->data;
3810 int size = pkt->size;
3811 VP9Context *s = ctx->priv_data;
3812 int res, tile_row, tile_col, i, ref, row, col;
3813 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3814 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3817 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3819 } else if (res == 0) {
3820 if (!s->refs[ref].f->data[0]) {
3821 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3822 return AVERROR_INVALIDDATA;
3824 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3826 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3827 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3828 for (i = 0; i < 8; i++) {
3829 if (s->next_refs[i].f->data[0])
3830 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3831 if (s->refs[i].f->data[0] &&
3832 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
3841 if (!retain_segmap_ref) {
3842 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3843 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3844 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3845 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3848 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3849 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3850 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3851 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3853 if (s->frames[CUR_FRAME].tf.f->data[0])
3854 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3855 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3857 f = s->frames[CUR_FRAME].tf.f;
3858 f->key_frame = s->keyframe;
3859 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3860 ls_y = f->linesize[0];
3861 ls_uv =f->linesize[1];
3864 for (i = 0; i < 8; i++) {
3865 if (s->next_refs[i].f->data[0])
3866 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3867 if (s->refreshrefmask & (1 << i)) {
3868 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3869 } else if (s->refs[i].f->data[0]) {
3870 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3876 // main tile decode loop
3877 memset(s->above_partition_ctx, 0, s->cols);
3878 memset(s->above_skip_ctx, 0, s->cols);
3879 if (s->keyframe || s->intraonly) {
3880 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3882 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3884 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3885 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
3886 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
3887 memset(s->above_segpred_ctx, 0, s->cols);
3888 s->pass = s->frames[CUR_FRAME].uses_2pass =
3889 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3890 if ((res = update_block_buffers(ctx)) < 0) {
3891 av_log(ctx, AV_LOG_ERROR,
3892 "Failed to allocate block buffers\n");
3895 if (s->refreshctx && s->parallelmode) {
3898 for (i = 0; i < 4; i++) {
3899 for (j = 0; j < 2; j++)
3900 for (k = 0; k < 2; k++)
3901 for (l = 0; l < 6; l++)
3902 for (m = 0; m < 6; m++)
3903 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3904 s->prob.coef[i][j][k][l][m], 3);
3905 if (s->txfmmode == i)
3908 s->prob_ctx[s->framectxid].p = s->prob.p;
3909 ff_thread_finish_setup(ctx);
3910 } else if (!s->refreshctx) {
3911 ff_thread_finish_setup(ctx);
3917 s->block = s->block_base;
3918 s->uvblock[0] = s->uvblock_base[0];
3919 s->uvblock[1] = s->uvblock_base[1];
3920 s->eob = s->eob_base;
3921 s->uveob[0] = s->uveob_base[0];
3922 s->uveob[1] = s->uveob_base[1];
3924 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3925 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3926 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3928 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3931 if (tile_col == s->tiling.tile_cols - 1 &&
3932 tile_row == s->tiling.tile_rows - 1) {
3935 tile_size = AV_RB32(data);
3939 if (tile_size > size) {
3940 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3941 return AVERROR_INVALIDDATA;
3943 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3944 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3945 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3946 return AVERROR_INVALIDDATA;
3953 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3954 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
3955 struct VP9Filter *lflvl_ptr = s->lflvl;
3956 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3958 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3959 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3960 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3963 memset(s->left_partition_ctx, 0, 8);
3964 memset(s->left_skip_ctx, 0, 8);
3965 if (s->keyframe || s->intraonly) {
3966 memset(s->left_mode_ctx, DC_PRED, 16);
3968 memset(s->left_mode_ctx, NEARESTMV, 8);
3970 memset(s->left_y_nnz_ctx, 0, 16);
3971 memset(s->left_uv_nnz_ctx, 0, 32);
3972 memset(s->left_segpred_ctx, 0, 8);
3974 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3977 for (col = s->tiling.tile_col_start;
3978 col < s->tiling.tile_col_end;
3979 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
3980 // FIXME integrate with lf code (i.e. zero after each
3981 // use, similar to invtxfm coefficients, or similar)
3983 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3987 decode_sb_mem(ctx, row, col, lflvl_ptr,
3988 yoff2, uvoff2, BL_64X64);
3990 decode_sb(ctx, row, col, lflvl_ptr,
3991 yoff2, uvoff2, BL_64X64);
3995 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4003 // backup pre-loopfilter reconstruction data for intra
4004 // prediction of next row of sb64s
4005 if (row + 8 < s->rows) {
4006 memcpy(s->intra_pred_data[0],
4007 f->data[0] + yoff + 63 * ls_y,
4009 memcpy(s->intra_pred_data[1],
4010 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4011 8 * s->cols >> s->ss_h);
4012 memcpy(s->intra_pred_data[2],
4013 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4014 8 * s->cols >> s->ss_h);
4017 // loopfilter one row
4018 if (s->filter.level) {
4021 lflvl_ptr = s->lflvl;
4022 for (col = 0; col < s->cols;
4023 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
4024 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4028 // FIXME maybe we can make this more finegrained by running the
4029 // loopfilter per-block instead of after each sbrow
4030 // In fact that would also make intra pred left preparation easier?
4031 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4035 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4037 ff_thread_finish_setup(ctx);
4039 } while (s->pass++ == 1);
4040 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4043 for (i = 0; i < 8; i++) {
4044 if (s->refs[i].f->data[0])
4045 ff_thread_release_buffer(ctx, &s->refs[i]);
4046 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4049 if (!s->invisible) {
4050 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4058 static void vp9_decode_flush(AVCodecContext *ctx)
4060 VP9Context *s = ctx->priv_data;
4063 for (i = 0; i < 3; i++)
4064 vp9_unref_frame(ctx, &s->frames[i]);
4065 for (i = 0; i < 8; i++)
4066 ff_thread_release_buffer(ctx, &s->refs[i]);
4069 static int init_frames(AVCodecContext *ctx)
4071 VP9Context *s = ctx->priv_data;
4074 for (i = 0; i < 3; i++) {
4075 s->frames[i].tf.f = av_frame_alloc();
4076 if (!s->frames[i].tf.f) {
4077 vp9_decode_free(ctx);
4078 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4079 return AVERROR(ENOMEM);
4082 for (i = 0; i < 8; i++) {
4083 s->refs[i].f = av_frame_alloc();
4084 s->next_refs[i].f = av_frame_alloc();
4085 if (!s->refs[i].f || !s->next_refs[i].f) {
4086 vp9_decode_free(ctx);
4087 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4088 return AVERROR(ENOMEM);
4095 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4097 VP9Context *s = ctx->priv_data;
4099 ctx->internal->allocate_progress = 1;
4100 ff_vp9dsp_init(&s->dsp);
4101 ff_videodsp_init(&s->vdsp, 8);
4102 s->filter.sharpness = -1;
4104 return init_frames(ctx);
4107 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4109 return init_frames(avctx);
4112 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4115 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4117 // detect size changes in other threads
4118 if (s->intra_pred_data[0] &&
4119 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4123 for (i = 0; i < 3; i++) {
4124 if (s->frames[i].tf.f->data[0])
4125 vp9_unref_frame(dst, &s->frames[i]);
4126 if (ssrc->frames[i].tf.f->data[0]) {
4127 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4131 for (i = 0; i < 8; i++) {
4132 if (s->refs[i].f->data[0])
4133 ff_thread_release_buffer(dst, &s->refs[i]);
4134 if (ssrc->next_refs[i].f->data[0]) {
4135 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4140 s->invisible = ssrc->invisible;
4141 s->keyframe = ssrc->keyframe;
4142 s->ss_v = ssrc->ss_v;
4143 s->ss_h = ssrc->ss_h;
4144 s->segmentation.enabled = ssrc->segmentation.enabled;
4145 s->segmentation.update_map = ssrc->segmentation.update_map;
4146 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4147 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4148 if (ssrc->segmentation.enabled) {
4149 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4150 sizeof(s->segmentation.feat));
4156 AVCodec ff_vp9_decoder = {
4158 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4159 .type = AVMEDIA_TYPE_VIDEO,
4160 .id = AV_CODEC_ID_VP9,
4161 .priv_data_size = sizeof(VP9Context),
4162 .init = vp9_decode_init,
4163 .close = vp9_decode_free,
4164 .decode = vp9_decode_frame,
4165 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4166 .flush = vp9_decode_flush,
4167 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4168 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),