2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
82 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
83 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
86 typedef struct VP9Block {
87 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
88 enum FilterMode filter;
89 VP56mv mv[4 /* b_idx */][2 /* ref */];
91 enum TxfmMode tx, uvtx;
93 enum BlockPartition bp;
96 typedef struct VP9Context {
103 VP9Block *b_base, *b;
105 int row, row7, col, col7;
107 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
113 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
131 #define REF_FRAME_MVPAIR 1
132 #define REF_FRAME_SEGMAP 2
139 uint8_t mblim_lut[64];
147 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
149 #define MAX_SEGMENT 8
153 uint8_t absolute_vals;
159 uint8_t skip_enabled;
168 unsigned log2_tile_cols, log2_tile_rows;
169 unsigned tile_cols, tile_rows;
170 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
172 unsigned sb_cols, sb_rows, rows, cols;
175 uint8_t coef[4][2][2][6][6][3];
179 uint8_t coef[4][2][2][6][6][11];
184 unsigned y_mode[4][10];
185 unsigned uv_mode[10][10];
186 unsigned filter[4][3];
187 unsigned mv_mode[7][4];
188 unsigned intra[4][2];
190 unsigned single_ref[5][2][2];
191 unsigned comp_ref[5][2];
192 unsigned tx32p[2][4];
193 unsigned tx16p[2][3];
196 unsigned mv_joint[4];
199 unsigned classes[11];
201 unsigned bits[10][2];
202 unsigned class0_fp[2][4];
204 unsigned class0_hp[2];
207 unsigned partition[4][4][4];
208 unsigned coef[4][2][2][6][6][3];
209 unsigned eob[4][2][2][6][6][2];
211 enum TxfmMode txfmmode;
212 enum CompPredMode comppredmode;
214 // contextual (left/above) cache
215 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
216 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
217 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
218 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
219 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
227 uint8_t *above_partition_ctx;
228 uint8_t *above_mode_ctx;
229 // FIXME maybe merge some of the below in a flags field?
230 uint8_t *above_y_nnz_ctx;
231 uint8_t *above_uv_nnz_ctx[2];
232 uint8_t *above_skip_ctx; // 1bit
233 uint8_t *above_txfm_ctx; // 2bit
234 uint8_t *above_segpred_ctx; // 1bit
235 uint8_t *above_intra_ctx; // 1bit
236 uint8_t *above_comp_ctx; // 1bit
237 uint8_t *above_ref_ctx; // 2bit
238 uint8_t *above_filter_ctx;
239 VP56mv (*above_mv_ctx)[2];
242 uint8_t *intra_pred_data[3];
243 struct VP9Filter *lflvl;
244 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144];
246 // block reconstruction intermediates
247 int block_alloc_using_2pass;
248 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
249 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
250 struct { int x, y; } min_mv, max_mv;
251 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64];
252 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64];
253 uint16_t mvscale[3][2];
254 uint8_t mvstep[3][2];
257 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
259 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
260 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
262 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
263 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
267 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
269 VP9Context *s = ctx->priv_data;
272 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
274 sz = 64 * s->sb_cols * s->sb_rows;
275 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
276 ff_thread_release_buffer(ctx, &f->tf);
277 return AVERROR(ENOMEM);
280 f->segmentation_map = f->extradata->data;
281 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
286 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
288 ff_thread_release_buffer(ctx, &f->tf);
289 av_buffer_unref(&f->extradata);
292 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
296 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
298 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
299 vp9_unref_frame(ctx, dst);
300 return AVERROR(ENOMEM);
303 dst->segmentation_map = src->segmentation_map;
305 dst->uses_2pass = src->uses_2pass;
310 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
312 VP9Context *s = ctx->priv_data;
315 av_assert0(w > 0 && h > 0);
317 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 // FIXME we slightly over-allocate here for subsampled chroma, but a little
331 // bit of padding shouldn't affect performance...
332 p = av_malloc(s->sb_cols * (320 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
334 return AVERROR(ENOMEM);
335 assign(s->intra_pred_data[0], uint8_t *, 64);
336 assign(s->intra_pred_data[1], uint8_t *, 64);
337 assign(s->intra_pred_data[2], uint8_t *, 64);
338 assign(s->above_y_nnz_ctx, uint8_t *, 16);
339 assign(s->above_mode_ctx, uint8_t *, 16);
340 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
341 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
342 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
343 assign(s->above_partition_ctx, uint8_t *, 8);
344 assign(s->above_skip_ctx, uint8_t *, 8);
345 assign(s->above_txfm_ctx, uint8_t *, 8);
346 assign(s->above_segpred_ctx, uint8_t *, 8);
347 assign(s->above_intra_ctx, uint8_t *, 8);
348 assign(s->above_comp_ctx, uint8_t *, 8);
349 assign(s->above_ref_ctx, uint8_t *, 8);
350 assign(s->above_filter_ctx, uint8_t *, 8);
351 assign(s->lflvl, struct VP9Filter *, 1);
354 // these will be re-allocated a little later
355 av_freep(&s->b_base);
356 av_freep(&s->block_base);
361 static int update_block_buffers(AVCodecContext *ctx)
363 VP9Context *s = ctx->priv_data;
364 int chroma_blocks, chroma_eobs;
366 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
370 av_free(s->block_base);
371 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
372 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
373 if (s->frames[CUR_FRAME].uses_2pass) {
374 int sbs = s->sb_cols * s->sb_rows;
376 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
377 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
378 16 * 16 + 2 * chroma_eobs) * sbs);
379 if (!s->b_base || !s->block_base)
380 return AVERROR(ENOMEM);
381 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
382 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks;
383 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks);
384 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
385 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
387 s->b_base = av_malloc(sizeof(VP9Block));
388 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * sizeof(int16_t) +
389 16 * 16 + 2 * chroma_eobs);
390 if (!s->b_base || !s->block_base)
391 return AVERROR(ENOMEM);
392 s->uvblock_base[0] = s->block_base + 64 * 64;
393 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks;
394 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks);
395 s->uveob_base[0] = s->eob_base + 16 * 16;
396 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
398 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
403 // for some reason the sign bit is at the end, not the start, of a bit sequence
404 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
406 int v = get_bits(gb, n);
407 return get_bits1(gb) ? -v : v;
410 static av_always_inline int inv_recenter_nonneg(int v, int m)
412 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
415 // differential forward probability updates
416 static int update_prob(VP56RangeCoder *c, int p)
418 static const int inv_map_table[254] = {
419 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
420 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
421 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
422 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
423 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
424 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
425 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
426 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
427 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
428 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
429 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
430 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
431 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
432 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
433 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
434 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
435 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
436 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
441 /* This code is trying to do a differential probability update. For a
442 * current probability A in the range [1, 255], the difference to a new
443 * probability of any value can be expressed differentially as 1-A,255-A
444 * where some part of this (absolute range) exists both in positive as
445 * well as the negative part, whereas another part only exists in one
446 * half. We're trying to code this shared part differentially, i.e.
447 * times two where the value of the lowest bit specifies the sign, and
448 * the single part is then coded on top of this. This absolute difference
449 * then again has a value of [0,254], but a bigger value in this range
450 * indicates that we're further away from the original value A, so we
451 * can code this as a VLC code, since higher values are increasingly
452 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
453 * updates vs. the 'fine, exact' updates further down the range, which
454 * adds one extra dimension to this differential update model. */
456 if (!vp8_rac_get(c)) {
457 d = vp8_rac_get_uint(c, 4) + 0;
458 } else if (!vp8_rac_get(c)) {
459 d = vp8_rac_get_uint(c, 4) + 16;
460 } else if (!vp8_rac_get(c)) {
461 d = vp8_rac_get_uint(c, 5) + 32;
463 d = vp8_rac_get_uint(c, 7);
465 d = (d << 1) - 65 + vp8_rac_get(c);
469 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
470 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
473 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
475 static const enum AVColorSpace colorspaces[8] = {
476 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
477 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
479 VP9Context *s = ctx->priv_data;
480 enum AVPixelFormat res;
482 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
483 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
484 if (s->profile == 1) {
485 s->ss_h = s->ss_v = 1;
486 res = AV_PIX_FMT_GBRP;
487 ctx->color_range = AVCOL_RANGE_JPEG;
489 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
490 return AVERROR_INVALIDDATA;
493 static const enum AVPixelFormat pix_fmt_for_ss[2 /* v */][2 /* h */] = {
494 { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
495 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P },
497 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
498 if (s->profile == 1) {
499 s->ss_h = get_bits1(&s->gb);
500 s->ss_v = get_bits1(&s->gb);
501 if ((res = pix_fmt_for_ss[s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
502 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile 1\n");
503 return AVERROR_INVALIDDATA;
504 } else if (get_bits1(&s->gb)) {
505 av_log(ctx, AV_LOG_ERROR, "Profile 1 color details reserved bit set\n");
506 return AVERROR_INVALIDDATA;
509 s->ss_h = s->ss_v = 1;
510 res = AV_PIX_FMT_YUV420P;
517 static int decode_frame_header(AVCodecContext *ctx,
518 const uint8_t *data, int size, int *ref)
520 VP9Context *s = ctx->priv_data;
521 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
522 enum AVPixelFormat fmt = ctx->pix_fmt;
524 const uint8_t *data2;
527 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
528 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
531 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
532 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
533 return AVERROR_INVALIDDATA;
535 s->profile = get_bits1(&s->gb);
536 s->profile |= get_bits1(&s->gb) << 1;
537 if (s->profile > 1) {
538 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", s->profile);
539 return AVERROR_INVALIDDATA;
541 if (get_bits1(&s->gb)) {
542 *ref = get_bits(&s->gb, 3);
545 s->last_keyframe = s->keyframe;
546 s->keyframe = !get_bits1(&s->gb);
547 last_invisible = s->invisible;
548 s->invisible = !get_bits1(&s->gb);
549 s->errorres = get_bits1(&s->gb);
550 s->use_last_frame_mvs = !s->errorres && !last_invisible;
552 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
553 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
554 return AVERROR_INVALIDDATA;
556 if ((fmt = read_colorspace_details(ctx)) < 0)
558 // for profile 1, here follows the subsampling bits
559 s->refreshrefmask = 0xff;
560 w = get_bits(&s->gb, 16) + 1;
561 h = get_bits(&s->gb, 16) + 1;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
565 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
566 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
568 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
569 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
570 return AVERROR_INVALIDDATA;
572 if (s->profile == 1) {
573 if ((fmt = read_colorspace_details(ctx)) < 0)
576 s->ss_h = s->ss_v = 1;
577 fmt = AV_PIX_FMT_YUV420P;
578 ctx->colorspace = AVCOL_SPC_BT470BG;
579 ctx->color_range = AVCOL_RANGE_JPEG;
581 s->refreshrefmask = get_bits(&s->gb, 8);
582 w = get_bits(&s->gb, 16) + 1;
583 h = get_bits(&s->gb, 16) + 1;
584 if (get_bits1(&s->gb)) // display size
585 skip_bits(&s->gb, 32);
587 s->refreshrefmask = get_bits(&s->gb, 8);
588 s->refidx[0] = get_bits(&s->gb, 3);
589 s->signbias[0] = get_bits1(&s->gb);
590 s->refidx[1] = get_bits(&s->gb, 3);
591 s->signbias[1] = get_bits1(&s->gb);
592 s->refidx[2] = get_bits(&s->gb, 3);
593 s->signbias[2] = get_bits1(&s->gb);
594 if (!s->refs[s->refidx[0]].f->data[0] ||
595 !s->refs[s->refidx[1]].f->data[0] ||
596 !s->refs[s->refidx[2]].f->data[0]) {
597 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
598 return AVERROR_INVALIDDATA;
600 if (get_bits1(&s->gb)) {
601 w = s->refs[s->refidx[0]].f->width;
602 h = s->refs[s->refidx[0]].f->height;
603 } else if (get_bits1(&s->gb)) {
604 w = s->refs[s->refidx[1]].f->width;
605 h = s->refs[s->refidx[1]].f->height;
606 } else if (get_bits1(&s->gb)) {
607 w = s->refs[s->refidx[2]].f->width;
608 h = s->refs[s->refidx[2]].f->height;
610 w = get_bits(&s->gb, 16) + 1;
611 h = get_bits(&s->gb, 16) + 1;
613 // Note that in this code, "CUR_FRAME" is actually before we
614 // have formally allocated a frame, and thus actually represents
616 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
617 s->frames[CUR_FRAME].tf.f->height == h;
618 if (get_bits1(&s->gb)) // display size
619 skip_bits(&s->gb, 32);
620 s->highprecisionmvs = get_bits1(&s->gb);
621 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
623 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
624 s->signbias[0] != s->signbias[2];
625 if (s->allowcompinter) {
626 if (s->signbias[0] == s->signbias[1]) {
628 s->varcompref[0] = 0;
629 s->varcompref[1] = 1;
630 } else if (s->signbias[0] == s->signbias[2]) {
632 s->varcompref[0] = 0;
633 s->varcompref[1] = 2;
636 s->varcompref[0] = 1;
637 s->varcompref[1] = 2;
641 for (i = 0; i < 3; i++) {
642 AVFrame *ref = s->refs[s->refidx[i]].f;
643 int refw = ref->width, refh = ref->height;
645 if (refw == w && refh == h) {
646 s->mvscale[i][0] = s->mvscale[i][1] = 0;
648 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
649 av_log(ctx, AV_LOG_ERROR,
650 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
652 return AVERROR_INVALIDDATA;
654 s->mvscale[i][0] = (refw << 14) / w;
655 s->mvscale[i][1] = (refh << 14) / h;
656 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
657 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
662 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
663 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
664 s->framectxid = c = get_bits(&s->gb, 2);
666 /* loopfilter header data */
667 s->filter.level = get_bits(&s->gb, 6);
668 sharp = get_bits(&s->gb, 3);
669 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
670 // the old cache values since they are still valid
671 if (s->filter.sharpness != sharp)
672 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
673 s->filter.sharpness = sharp;
674 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
675 if (get_bits1(&s->gb)) {
676 for (i = 0; i < 4; i++)
677 if (get_bits1(&s->gb))
678 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
679 for (i = 0; i < 2; i++)
680 if (get_bits1(&s->gb))
681 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
685 /* quantization header data */
686 s->yac_qi = get_bits(&s->gb, 8);
687 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
688 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
689 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
690 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
691 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
693 /* segmentation header info */
694 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
695 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
696 for (i = 0; i < 7; i++)
697 s->prob.seg[i] = get_bits1(&s->gb) ?
698 get_bits(&s->gb, 8) : 255;
699 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
700 for (i = 0; i < 3; i++)
701 s->prob.segpred[i] = get_bits1(&s->gb) ?
702 get_bits(&s->gb, 8) : 255;
705 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
706 (w != s->frames[CUR_FRAME].tf.f->width ||
707 h != s->frames[CUR_FRAME].tf.f->height)) {
708 av_log(ctx, AV_LOG_ERROR,
709 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
710 s->segmentation.temporal, s->segmentation.update_map);
711 return AVERROR_INVALIDDATA;
714 if (get_bits1(&s->gb)) {
715 s->segmentation.absolute_vals = get_bits1(&s->gb);
716 for (i = 0; i < 8; i++) {
717 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
718 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
719 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
720 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
721 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
722 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
723 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
727 s->segmentation.feat[0].q_enabled = 0;
728 s->segmentation.feat[0].lf_enabled = 0;
729 s->segmentation.feat[0].skip_enabled = 0;
730 s->segmentation.feat[0].ref_enabled = 0;
733 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
734 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
735 int qyac, qydc, quvac, quvdc, lflvl, sh;
737 if (s->segmentation.feat[i].q_enabled) {
738 if (s->segmentation.absolute_vals)
739 qyac = s->segmentation.feat[i].q_val;
741 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
745 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
746 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
747 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
748 qyac = av_clip_uintp2(qyac, 8);
750 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
751 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
752 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
753 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
755 sh = s->filter.level >= 32;
756 if (s->segmentation.feat[i].lf_enabled) {
757 if (s->segmentation.absolute_vals)
758 lflvl = s->segmentation.feat[i].lf_val;
760 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
762 lflvl = s->filter.level;
764 if (s->lf_delta.enabled) {
765 s->segmentation.feat[i].lflvl[0][0] =
766 s->segmentation.feat[i].lflvl[0][1] =
767 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
768 for (j = 1; j < 4; j++) {
769 s->segmentation.feat[i].lflvl[j][0] =
770 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
771 s->lf_delta.mode[0]) * (1 << sh)), 6);
772 s->segmentation.feat[i].lflvl[j][1] =
773 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
774 s->lf_delta.mode[1]) * (1 << sh)), 6);
777 memset(s->segmentation.feat[i].lflvl, lflvl,
778 sizeof(s->segmentation.feat[i].lflvl));
783 if ((res = update_size(ctx, w, h, fmt)) < 0) {
784 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
787 for (s->tiling.log2_tile_cols = 0;
788 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
789 s->tiling.log2_tile_cols++) ;
790 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
791 max = FFMAX(0, max - 1);
792 while (max > s->tiling.log2_tile_cols) {
793 if (get_bits1(&s->gb))
794 s->tiling.log2_tile_cols++;
798 s->tiling.log2_tile_rows = decode012(&s->gb);
799 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
800 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
801 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
802 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
803 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
805 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
806 return AVERROR(ENOMEM);
810 if (s->keyframe || s->errorres || s->intraonly) {
811 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
812 s->prob_ctx[3].p = vp9_default_probs;
813 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
814 sizeof(vp9_default_coef_probs));
815 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
816 sizeof(vp9_default_coef_probs));
817 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
818 sizeof(vp9_default_coef_probs));
819 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
820 sizeof(vp9_default_coef_probs));
823 // next 16 bits is size of the rest of the header (arith-coded)
824 size2 = get_bits(&s->gb, 16);
825 data2 = align_get_bits(&s->gb);
826 if (size2 > size - (data2 - data)) {
827 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
828 return AVERROR_INVALIDDATA;
830 ff_vp56_init_range_decoder(&s->c, data2, size2);
831 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
832 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
833 return AVERROR_INVALIDDATA;
836 if (s->keyframe || s->intraonly) {
837 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
839 memset(&s->counts, 0, sizeof(s->counts));
841 // FIXME is it faster to not copy here, but do it down in the fw updates
842 // as explicit copies if the fw update is missing (and skip the copy upon
844 s->prob.p = s->prob_ctx[c].p;
848 s->txfmmode = TX_4X4;
850 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
851 if (s->txfmmode == 3)
852 s->txfmmode += vp8_rac_get(&s->c);
854 if (s->txfmmode == TX_SWITCHABLE) {
855 for (i = 0; i < 2; i++)
856 if (vp56_rac_get_prob_branchy(&s->c, 252))
857 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
858 for (i = 0; i < 2; i++)
859 for (j = 0; j < 2; j++)
860 if (vp56_rac_get_prob_branchy(&s->c, 252))
861 s->prob.p.tx16p[i][j] =
862 update_prob(&s->c, s->prob.p.tx16p[i][j]);
863 for (i = 0; i < 2; i++)
864 for (j = 0; j < 3; j++)
865 if (vp56_rac_get_prob_branchy(&s->c, 252))
866 s->prob.p.tx32p[i][j] =
867 update_prob(&s->c, s->prob.p.tx32p[i][j]);
872 for (i = 0; i < 4; i++) {
873 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
874 if (vp8_rac_get(&s->c)) {
875 for (j = 0; j < 2; j++)
876 for (k = 0; k < 2; k++)
877 for (l = 0; l < 6; l++)
878 for (m = 0; m < 6; m++) {
879 uint8_t *p = s->prob.coef[i][j][k][l][m];
880 uint8_t *r = ref[j][k][l][m];
881 if (m >= 3 && l == 0) // dc only has 3 pt
883 for (n = 0; n < 3; n++) {
884 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
885 p[n] = update_prob(&s->c, r[n]);
893 for (j = 0; j < 2; j++)
894 for (k = 0; k < 2; k++)
895 for (l = 0; l < 6; l++)
896 for (m = 0; m < 6; m++) {
897 uint8_t *p = s->prob.coef[i][j][k][l][m];
898 uint8_t *r = ref[j][k][l][m];
899 if (m > 3 && l == 0) // dc only has 3 pt
905 if (s->txfmmode == i)
910 for (i = 0; i < 3; i++)
911 if (vp56_rac_get_prob_branchy(&s->c, 252))
912 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
913 if (!s->keyframe && !s->intraonly) {
914 for (i = 0; i < 7; i++)
915 for (j = 0; j < 3; j++)
916 if (vp56_rac_get_prob_branchy(&s->c, 252))
917 s->prob.p.mv_mode[i][j] =
918 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
920 if (s->filtermode == FILTER_SWITCHABLE)
921 for (i = 0; i < 4; i++)
922 for (j = 0; j < 2; j++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.filter[i][j] =
925 update_prob(&s->c, s->prob.p.filter[i][j]);
927 for (i = 0; i < 4; i++)
928 if (vp56_rac_get_prob_branchy(&s->c, 252))
929 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
931 if (s->allowcompinter) {
932 s->comppredmode = vp8_rac_get(&s->c);
934 s->comppredmode += vp8_rac_get(&s->c);
935 if (s->comppredmode == PRED_SWITCHABLE)
936 for (i = 0; i < 5; i++)
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
939 update_prob(&s->c, s->prob.p.comp[i]);
941 s->comppredmode = PRED_SINGLEREF;
944 if (s->comppredmode != PRED_COMPREF) {
945 for (i = 0; i < 5; i++) {
946 if (vp56_rac_get_prob_branchy(&s->c, 252))
947 s->prob.p.single_ref[i][0] =
948 update_prob(&s->c, s->prob.p.single_ref[i][0]);
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.single_ref[i][1] =
951 update_prob(&s->c, s->prob.p.single_ref[i][1]);
955 if (s->comppredmode != PRED_SINGLEREF) {
956 for (i = 0; i < 5; i++)
957 if (vp56_rac_get_prob_branchy(&s->c, 252))
958 s->prob.p.comp_ref[i] =
959 update_prob(&s->c, s->prob.p.comp_ref[i]);
962 for (i = 0; i < 4; i++)
963 for (j = 0; j < 9; j++)
964 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 s->prob.p.y_mode[i][j] =
966 update_prob(&s->c, s->prob.p.y_mode[i][j]);
968 for (i = 0; i < 4; i++)
969 for (j = 0; j < 4; j++)
970 for (k = 0; k < 3; k++)
971 if (vp56_rac_get_prob_branchy(&s->c, 252))
972 s->prob.p.partition[3 - i][j][k] =
973 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
975 // mv fields don't use the update_prob subexp model for some reason
976 for (i = 0; i < 3; i++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
980 for (i = 0; i < 2; i++) {
981 if (vp56_rac_get_prob_branchy(&s->c, 252))
982 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
984 for (j = 0; j < 10; j++)
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.mv_comp[i].classes[j] =
987 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
989 if (vp56_rac_get_prob_branchy(&s->c, 252))
990 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
992 for (j = 0; j < 10; j++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.mv_comp[i].bits[j] =
995 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
998 for (i = 0; i < 2; i++) {
999 for (j = 0; j < 2; j++)
1000 for (k = 0; k < 3; k++)
1001 if (vp56_rac_get_prob_branchy(&s->c, 252))
1002 s->prob.p.mv_comp[i].class0_fp[j][k] =
1003 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1005 for (j = 0; j < 3; j++)
1006 if (vp56_rac_get_prob_branchy(&s->c, 252))
1007 s->prob.p.mv_comp[i].fp[j] =
1008 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1011 if (s->highprecisionmvs) {
1012 for (i = 0; i < 2; i++) {
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.mv_comp[i].class0_hp =
1015 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018 s->prob.p.mv_comp[i].hp =
1019 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1024 return (data2 - data) + size2;
1027 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1030 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1031 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1034 static void find_ref_mvs(VP9Context *s,
1035 VP56mv *pmv, int ref, int z, int idx, int sb)
1037 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1038 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1039 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1040 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1041 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1042 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1043 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1044 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1045 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1046 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1047 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1048 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1049 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1050 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1051 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1052 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1053 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1054 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1055 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1056 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1057 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1058 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1059 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1060 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1061 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1062 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1063 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1066 int row = s->row, col = s->col, row7 = s->row7;
1067 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1068 #define INVALID_MV 0x80008000U
1069 uint32_t mem = INVALID_MV;
1072 #define RETURN_DIRECT_MV(mv) \
1074 uint32_t m = AV_RN32A(&mv); \
1078 } else if (mem == INVALID_MV) { \
1080 } else if (m != mem) { \
1087 if (sb == 2 || sb == 1) {
1088 RETURN_DIRECT_MV(b->mv[0][z]);
1089 } else if (sb == 3) {
1090 RETURN_DIRECT_MV(b->mv[2][z]);
1091 RETURN_DIRECT_MV(b->mv[1][z]);
1092 RETURN_DIRECT_MV(b->mv[0][z]);
1095 #define RETURN_MV(mv) \
1100 clamp_mv(&tmp, &mv, s); \
1101 m = AV_RN32A(&tmp); \
1105 } else if (mem == INVALID_MV) { \
1107 } else if (m != mem) { \
1112 uint32_t m = AV_RN32A(&mv); \
1114 clamp_mv(pmv, &mv, s); \
1116 } else if (mem == INVALID_MV) { \
1118 } else if (m != mem) { \
1119 clamp_mv(pmv, &mv, s); \
1126 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1127 if (mv->ref[0] == ref) {
1128 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1129 } else if (mv->ref[1] == ref) {
1130 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1133 if (col > s->tiling.tile_col_start) {
1134 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1135 if (mv->ref[0] == ref) {
1136 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1137 } else if (mv->ref[1] == ref) {
1138 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1146 // previously coded MVs in this neighbourhood, using same reference frame
1147 for (; i < 8; i++) {
1148 int c = p[i][0] + col, r = p[i][1] + row;
1150 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1151 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1153 if (mv->ref[0] == ref) {
1154 RETURN_MV(mv->mv[0]);
1155 } else if (mv->ref[1] == ref) {
1156 RETURN_MV(mv->mv[1]);
1161 // MV at this position in previous frame, using same reference frame
1162 if (s->use_last_frame_mvs) {
1163 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1165 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1166 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1167 if (mv->ref[0] == ref) {
1168 RETURN_MV(mv->mv[0]);
1169 } else if (mv->ref[1] == ref) {
1170 RETURN_MV(mv->mv[1]);
1174 #define RETURN_SCALE_MV(mv, scale) \
1177 VP56mv mv_temp = { -mv.x, -mv.y }; \
1178 RETURN_MV(mv_temp); \
1184 // previously coded MVs in this neighbourhood, using different reference frame
1185 for (i = 0; i < 8; i++) {
1186 int c = p[i][0] + col, r = p[i][1] + row;
1188 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1189 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1191 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1192 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1194 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1195 // BUG - libvpx has this condition regardless of whether
1196 // we used the first ref MV and pre-scaling
1197 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1198 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1203 // MV at this position in previous frame, using different reference frame
1204 if (s->use_last_frame_mvs) {
1205 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1207 // no need to await_progress, because we already did that above
1208 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1209 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1211 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1212 // BUG - libvpx has this condition regardless of whether
1213 // we used the first ref MV and pre-scaling
1214 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1215 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1222 #undef RETURN_SCALE_MV
1225 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1227 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1228 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1229 s->prob.p.mv_comp[idx].classes);
1231 s->counts.mv_comp[idx].sign[sign]++;
1232 s->counts.mv_comp[idx].classes[c]++;
1236 for (n = 0, m = 0; m < c; m++) {
1237 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1239 s->counts.mv_comp[idx].bits[m][bit]++;
1242 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1244 s->counts.mv_comp[idx].fp[bit]++;
1246 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1247 s->counts.mv_comp[idx].hp[bit]++;
1251 // bug in libvpx - we count for bw entropy purposes even if the
1253 s->counts.mv_comp[idx].hp[1]++;
1257 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1258 s->counts.mv_comp[idx].class0[n]++;
1259 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1260 s->prob.p.mv_comp[idx].class0_fp[n]);
1261 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1262 n = (n << 3) | (bit << 1);
1264 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1265 s->counts.mv_comp[idx].class0_hp[bit]++;
1269 // bug in libvpx - we count for bw entropy purposes even if the
1271 s->counts.mv_comp[idx].class0_hp[1]++;
1275 return sign ? -(n + 1) : (n + 1);
1278 static void fill_mv(VP9Context *s,
1279 VP56mv *mv, int mode, int sb)
1283 if (mode == ZEROMV) {
1288 // FIXME cache this value and reuse for other subblocks
1289 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1290 mode == NEWMV ? -1 : sb);
1291 // FIXME maybe move this code into find_ref_mvs()
1292 if ((mode == NEWMV || sb == -1) &&
1293 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1307 if (mode == NEWMV) {
1308 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1309 s->prob.p.mv_joint);
1311 s->counts.mv_joint[j]++;
1312 if (j >= MV_JOINT_V)
1313 mv[0].y += read_mv_component(s, 0, hp);
1315 mv[0].x += read_mv_component(s, 1, hp);
1319 // FIXME cache this value and reuse for other subblocks
1320 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1321 mode == NEWMV ? -1 : sb);
1322 if ((mode == NEWMV || sb == -1) &&
1323 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1337 if (mode == NEWMV) {
1338 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1339 s->prob.p.mv_joint);
1341 s->counts.mv_joint[j]++;
1342 if (j >= MV_JOINT_V)
1343 mv[1].y += read_mv_component(s, 0, hp);
1345 mv[1].x += read_mv_component(s, 1, hp);
1351 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1352 ptrdiff_t stride, int v)
1362 int v16 = v * 0x0101;
1370 uint32_t v32 = v * 0x01010101;
1379 uint64_t v64 = v * 0x0101010101010101ULL;
1385 uint32_t v32 = v * 0x01010101;
1388 AV_WN32A(ptr + 4, v32);
1397 static void decode_mode(AVCodecContext *ctx)
1399 static const uint8_t left_ctx[N_BS_SIZES] = {
1400 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1402 static const uint8_t above_ctx[N_BS_SIZES] = {
1403 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1405 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1406 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1407 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1409 VP9Context *s = ctx->priv_data;
1411 int row = s->row, col = s->col, row7 = s->row7;
1412 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1413 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1414 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1415 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1416 int vref, filter_id;
1418 if (!s->segmentation.enabled) {
1420 } else if (s->keyframe || s->intraonly) {
1421 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1422 } else if (!s->segmentation.update_map ||
1423 (s->segmentation.temporal &&
1424 vp56_rac_get_prob_branchy(&s->c,
1425 s->prob.segpred[s->above_segpred_ctx[col] +
1426 s->left_segpred_ctx[row7]]))) {
1429 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1431 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1432 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1433 for (y = 0; y < h4; y++) {
1434 int idx_base = (y + row) * 8 * s->sb_cols + col;
1435 for (x = 0; x < w4; x++)
1436 pred = FFMIN(pred, refsegmap[idx_base + x]);
1438 av_assert1(pred < 8);
1444 memset(&s->above_segpred_ctx[col], 1, w4);
1445 memset(&s->left_segpred_ctx[row7], 1, h4);
1447 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1450 memset(&s->above_segpred_ctx[col], 0, w4);
1451 memset(&s->left_segpred_ctx[row7], 0, h4);
1453 if (s->segmentation.enabled &&
1454 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1455 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1456 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1459 b->skip = s->segmentation.enabled &&
1460 s->segmentation.feat[b->seg_id].skip_enabled;
1462 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1463 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1464 s->counts.skip[c][b->skip]++;
1467 if (s->keyframe || s->intraonly) {
1469 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1470 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1474 if (have_a && have_l) {
1475 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1478 c = have_a ? 2 * s->above_intra_ctx[col] :
1479 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1481 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1482 s->counts.intra[c][bit]++;
1486 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1490 c = (s->above_skip_ctx[col] ? max_tx :
1491 s->above_txfm_ctx[col]) +
1492 (s->left_skip_ctx[row7] ? max_tx :
1493 s->left_txfm_ctx[row7]) > max_tx;
1495 c = s->above_skip_ctx[col] ? 1 :
1496 (s->above_txfm_ctx[col] * 2 > max_tx);
1498 } else if (have_l) {
1499 c = s->left_skip_ctx[row7] ? 1 :
1500 (s->left_txfm_ctx[row7] * 2 > max_tx);
1506 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1508 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1510 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1512 s->counts.tx32p[c][b->tx]++;
1515 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1517 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1518 s->counts.tx16p[c][b->tx]++;
1521 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1522 s->counts.tx8p[c][b->tx]++;
1529 b->tx = FFMIN(max_tx, s->txfmmode);
1532 if (s->keyframe || s->intraonly) {
1533 uint8_t *a = &s->above_mode_ctx[col * 2];
1534 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1537 if (b->bs > BS_8x8) {
1538 // FIXME the memory storage intermediates here aren't really
1539 // necessary, they're just there to make the code slightly
1541 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1542 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1543 if (b->bs != BS_8x4) {
1544 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1545 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1546 l[0] = a[1] = b->mode[1];
1548 l[0] = a[1] = b->mode[1] = b->mode[0];
1550 if (b->bs != BS_4x8) {
1551 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1552 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1553 if (b->bs != BS_8x4) {
1554 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1555 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1556 l[1] = a[1] = b->mode[3];
1558 l[1] = a[1] = b->mode[3] = b->mode[2];
1561 b->mode[2] = b->mode[0];
1562 l[1] = a[1] = b->mode[3] = b->mode[1];
1565 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1566 vp9_default_kf_ymode_probs[*a][*l]);
1567 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1568 // FIXME this can probably be optimized
1569 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1570 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1572 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1573 vp9_default_kf_uvmode_probs[b->mode[3]]);
1574 } else if (b->intra) {
1576 if (b->bs > BS_8x8) {
1577 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1578 s->prob.p.y_mode[0]);
1579 s->counts.y_mode[0][b->mode[0]]++;
1580 if (b->bs != BS_8x4) {
1581 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1582 s->prob.p.y_mode[0]);
1583 s->counts.y_mode[0][b->mode[1]]++;
1585 b->mode[1] = b->mode[0];
1587 if (b->bs != BS_4x8) {
1588 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1589 s->prob.p.y_mode[0]);
1590 s->counts.y_mode[0][b->mode[2]]++;
1591 if (b->bs != BS_8x4) {
1592 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1593 s->prob.p.y_mode[0]);
1594 s->counts.y_mode[0][b->mode[3]]++;
1596 b->mode[3] = b->mode[2];
1599 b->mode[2] = b->mode[0];
1600 b->mode[3] = b->mode[1];
1603 static const uint8_t size_group[10] = {
1604 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1606 int sz = size_group[b->bs];
1608 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1609 s->prob.p.y_mode[sz]);
1610 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1611 s->counts.y_mode[sz][b->mode[3]]++;
1613 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1614 s->prob.p.uv_mode[b->mode[3]]);
1615 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1617 static const uint8_t inter_mode_ctx_lut[14][14] = {
1618 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1619 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1620 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1621 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1622 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1623 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1624 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1625 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1626 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1627 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1628 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1629 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1630 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1631 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1634 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1635 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1637 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1639 // read comp_pred flag
1640 if (s->comppredmode != PRED_SWITCHABLE) {
1641 b->comp = s->comppredmode == PRED_COMPREF;
1645 // FIXME add intra as ref=0xff (or -1) to make these easier?
1648 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1650 } else if (s->above_comp_ctx[col]) {
1651 c = 2 + (s->left_intra_ctx[row7] ||
1652 s->left_ref_ctx[row7] == s->fixcompref);
1653 } else if (s->left_comp_ctx[row7]) {
1654 c = 2 + (s->above_intra_ctx[col] ||
1655 s->above_ref_ctx[col] == s->fixcompref);
1657 c = (!s->above_intra_ctx[col] &&
1658 s->above_ref_ctx[col] == s->fixcompref) ^
1659 (!s->left_intra_ctx[row7] &&
1660 s->left_ref_ctx[row & 7] == s->fixcompref);
1663 c = s->above_comp_ctx[col] ? 3 :
1664 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1666 } else if (have_l) {
1667 c = s->left_comp_ctx[row7] ? 3 :
1668 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1672 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1673 s->counts.comp[c][b->comp]++;
1676 // read actual references
1677 // FIXME probably cache a few variables here to prevent repetitive
1678 // memory accesses below
1679 if (b->comp) /* two references */ {
1680 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1682 b->ref[fix_idx] = s->fixcompref;
1683 // FIXME can this codeblob be replaced by some sort of LUT?
1686 if (s->above_intra_ctx[col]) {
1687 if (s->left_intra_ctx[row7]) {
1690 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1692 } else if (s->left_intra_ctx[row7]) {
1693 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1695 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1697 if (refl == refa && refa == s->varcompref[1]) {
1699 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1700 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1701 (refl == s->fixcompref && refa == s->varcompref[0])) {
1704 c = (refa == refl) ? 3 : 1;
1706 } else if (!s->left_comp_ctx[row7]) {
1707 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1710 c = (refl == s->varcompref[1] &&
1711 refa != s->varcompref[1]) ? 2 : 4;
1713 } else if (!s->above_comp_ctx[col]) {
1714 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1717 c = (refa == s->varcompref[1] &&
1718 refl != s->varcompref[1]) ? 2 : 4;
1721 c = (refl == refa) ? 4 : 2;
1725 if (s->above_intra_ctx[col]) {
1727 } else if (s->above_comp_ctx[col]) {
1728 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1730 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1733 } else if (have_l) {
1734 if (s->left_intra_ctx[row7]) {
1736 } else if (s->left_comp_ctx[row7]) {
1737 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1739 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1744 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1745 b->ref[var_idx] = s->varcompref[bit];
1746 s->counts.comp_ref[c][bit]++;
1747 } else /* single reference */ {
1750 if (have_a && !s->above_intra_ctx[col]) {
1751 if (have_l && !s->left_intra_ctx[row7]) {
1752 if (s->left_comp_ctx[row7]) {
1753 if (s->above_comp_ctx[col]) {
1754 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1755 !s->above_ref_ctx[col]);
1757 c = (3 * !s->above_ref_ctx[col]) +
1758 (!s->fixcompref || !s->left_ref_ctx[row7]);
1760 } else if (s->above_comp_ctx[col]) {
1761 c = (3 * !s->left_ref_ctx[row7]) +
1762 (!s->fixcompref || !s->above_ref_ctx[col]);
1764 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1766 } else if (s->above_intra_ctx[col]) {
1768 } else if (s->above_comp_ctx[col]) {
1769 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1771 c = 4 * (!s->above_ref_ctx[col]);
1773 } else if (have_l && !s->left_intra_ctx[row7]) {
1774 if (s->left_intra_ctx[row7]) {
1776 } else if (s->left_comp_ctx[row7]) {
1777 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1779 c = 4 * (!s->left_ref_ctx[row7]);
1784 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1785 s->counts.single_ref[c][0][bit]++;
1789 // FIXME can this codeblob be replaced by some sort of LUT?
1792 if (s->left_intra_ctx[row7]) {
1793 if (s->above_intra_ctx[col]) {
1795 } else if (s->above_comp_ctx[col]) {
1796 c = 1 + 2 * (s->fixcompref == 1 ||
1797 s->above_ref_ctx[col] == 1);
1798 } else if (!s->above_ref_ctx[col]) {
1801 c = 4 * (s->above_ref_ctx[col] == 1);
1803 } else if (s->above_intra_ctx[col]) {
1804 if (s->left_intra_ctx[row7]) {
1806 } else if (s->left_comp_ctx[row7]) {
1807 c = 1 + 2 * (s->fixcompref == 1 ||
1808 s->left_ref_ctx[row7] == 1);
1809 } else if (!s->left_ref_ctx[row7]) {
1812 c = 4 * (s->left_ref_ctx[row7] == 1);
1814 } else if (s->above_comp_ctx[col]) {
1815 if (s->left_comp_ctx[row7]) {
1816 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1817 c = 3 * (s->fixcompref == 1 ||
1818 s->left_ref_ctx[row7] == 1);
1822 } else if (!s->left_ref_ctx[row7]) {
1823 c = 1 + 2 * (s->fixcompref == 1 ||
1824 s->above_ref_ctx[col] == 1);
1826 c = 3 * (s->left_ref_ctx[row7] == 1) +
1827 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1829 } else if (s->left_comp_ctx[row7]) {
1830 if (!s->above_ref_ctx[col]) {
1831 c = 1 + 2 * (s->fixcompref == 1 ||
1832 s->left_ref_ctx[row7] == 1);
1834 c = 3 * (s->above_ref_ctx[col] == 1) +
1835 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1837 } else if (!s->above_ref_ctx[col]) {
1838 if (!s->left_ref_ctx[row7]) {
1841 c = 4 * (s->left_ref_ctx[row7] == 1);
1843 } else if (!s->left_ref_ctx[row7]) {
1844 c = 4 * (s->above_ref_ctx[col] == 1);
1846 c = 2 * (s->left_ref_ctx[row7] == 1) +
1847 2 * (s->above_ref_ctx[col] == 1);
1850 if (s->above_intra_ctx[col] ||
1851 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1853 } else if (s->above_comp_ctx[col]) {
1854 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1856 c = 4 * (s->above_ref_ctx[col] == 1);
1859 } else if (have_l) {
1860 if (s->left_intra_ctx[row7] ||
1861 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1863 } else if (s->left_comp_ctx[row7]) {
1864 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1866 c = 4 * (s->left_ref_ctx[row7] == 1);
1871 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1872 s->counts.single_ref[c][1][bit]++;
1873 b->ref[0] = 1 + bit;
1878 if (b->bs <= BS_8x8) {
1879 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1880 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1882 static const uint8_t off[10] = {
1883 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1886 // FIXME this needs to use the LUT tables from find_ref_mvs
1887 // because not all are -1,0/0,-1
1888 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1889 [s->left_mode_ctx[row7 + off[b->bs]]];
1891 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1892 s->prob.p.mv_mode[c]);
1893 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1894 s->counts.mv_mode[c][b->mode[0] - 10]++;
1898 if (s->filtermode == FILTER_SWITCHABLE) {
1901 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1902 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1903 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1904 s->left_filter_ctx[row7] : 3;
1906 c = s->above_filter_ctx[col];
1908 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1909 c = s->left_filter_ctx[row7];
1914 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1915 s->prob.p.filter[c]);
1916 s->counts.filter[c][filter_id]++;
1917 b->filter = vp9_filter_lut[filter_id];
1919 b->filter = s->filtermode;
1922 if (b->bs > BS_8x8) {
1923 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1925 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1926 s->prob.p.mv_mode[c]);
1927 s->counts.mv_mode[c][b->mode[0] - 10]++;
1928 fill_mv(s, b->mv[0], b->mode[0], 0);
1930 if (b->bs != BS_8x4) {
1931 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1932 s->prob.p.mv_mode[c]);
1933 s->counts.mv_mode[c][b->mode[1] - 10]++;
1934 fill_mv(s, b->mv[1], b->mode[1], 1);
1936 b->mode[1] = b->mode[0];
1937 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1938 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1941 if (b->bs != BS_4x8) {
1942 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1943 s->prob.p.mv_mode[c]);
1944 s->counts.mv_mode[c][b->mode[2] - 10]++;
1945 fill_mv(s, b->mv[2], b->mode[2], 2);
1947 if (b->bs != BS_8x4) {
1948 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1949 s->prob.p.mv_mode[c]);
1950 s->counts.mv_mode[c][b->mode[3] - 10]++;
1951 fill_mv(s, b->mv[3], b->mode[3], 3);
1953 b->mode[3] = b->mode[2];
1954 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1955 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1958 b->mode[2] = b->mode[0];
1959 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1960 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1961 b->mode[3] = b->mode[1];
1962 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1963 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1966 fill_mv(s, b->mv[0], b->mode[0], -1);
1967 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1968 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1969 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1970 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1971 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1972 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1975 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1979 #define SPLAT_CTX(var, val, n) \
1981 case 1: var = val; break; \
1982 case 2: AV_WN16A(&var, val * 0x0101); break; \
1983 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1984 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1986 uint64_t v64 = val * 0x0101010101010101ULL; \
1987 AV_WN64A( &var, v64); \
1988 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1993 #define SPLAT_CTX(var, val, n) \
1995 case 1: var = val; break; \
1996 case 2: AV_WN16A(&var, val * 0x0101); break; \
1997 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1999 uint32_t v32 = val * 0x01010101; \
2000 AV_WN32A( &var, v32); \
2001 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2005 uint32_t v32 = val * 0x01010101; \
2006 AV_WN32A( &var, v32); \
2007 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2008 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2009 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2015 switch (bwh_tab[1][b->bs][0]) {
2016 #define SET_CTXS(dir, off, n) \
2018 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2019 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2020 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2021 if (!s->keyframe && !s->intraonly) { \
2022 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2023 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2024 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2026 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2027 if (s->filtermode == FILTER_SWITCHABLE) { \
2028 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2033 case 1: SET_CTXS(above, col, 1); break;
2034 case 2: SET_CTXS(above, col, 2); break;
2035 case 4: SET_CTXS(above, col, 4); break;
2036 case 8: SET_CTXS(above, col, 8); break;
2038 switch (bwh_tab[1][b->bs][1]) {
2039 case 1: SET_CTXS(left, row7, 1); break;
2040 case 2: SET_CTXS(left, row7, 2); break;
2041 case 4: SET_CTXS(left, row7, 4); break;
2042 case 8: SET_CTXS(left, row7, 8); break;
2047 if (!s->keyframe && !s->intraonly) {
2048 if (b->bs > BS_8x8) {
2049 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2051 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2052 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2053 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2054 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2055 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2056 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2057 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2058 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2060 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2062 for (n = 0; n < w4 * 2; n++) {
2063 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2064 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2066 for (n = 0; n < h4 * 2; n++) {
2067 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2068 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2074 for (y = 0; y < h4; y++) {
2075 int x, o = (row + y) * s->sb_cols * 8 + col;
2076 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2079 for (x = 0; x < w4; x++) {
2083 } else if (b->comp) {
2084 for (x = 0; x < w4; x++) {
2085 mv[x].ref[0] = b->ref[0];
2086 mv[x].ref[1] = b->ref[1];
2087 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2088 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2091 for (x = 0; x < w4; x++) {
2092 mv[x].ref[0] = b->ref[0];
2094 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2100 // FIXME merge cnt/eob arguments?
2101 static av_always_inline int
2102 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2103 int is_tx32x32, unsigned (*cnt)[6][3],
2104 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2105 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2106 const int16_t *band_counts, const int16_t *qmul)
2108 int i = 0, band = 0, band_left = band_counts[band];
2109 uint8_t *tp = p[0][nnz];
2110 uint8_t cache[1024];
2115 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2116 eob[band][nnz][val]++;
2121 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2122 cnt[band][nnz][0]++;
2124 band_left = band_counts[++band];
2126 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2128 if (++i == n_coeffs)
2129 break; //invalid input; blocks should end with EOB
2134 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2135 cnt[band][nnz][1]++;
2139 // fill in p[3-10] (model fill) - only once per frame for each pos
2141 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2143 cnt[band][nnz][2]++;
2144 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2145 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2146 cache[rc] = val = 2;
2148 val = 3 + vp56_rac_get_prob(c, tp[5]);
2151 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2153 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2154 val = 5 + vp56_rac_get_prob(c, 159);
2156 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2157 val += vp56_rac_get_prob(c, 145);
2161 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2162 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2163 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2164 val += (vp56_rac_get_prob(c, 148) << 1);
2165 val += vp56_rac_get_prob(c, 140);
2167 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2168 val += (vp56_rac_get_prob(c, 155) << 2);
2169 val += (vp56_rac_get_prob(c, 140) << 1);
2170 val += vp56_rac_get_prob(c, 135);
2172 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2173 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2174 val += (vp56_rac_get_prob(c, 157) << 3);
2175 val += (vp56_rac_get_prob(c, 141) << 2);
2176 val += (vp56_rac_get_prob(c, 134) << 1);
2177 val += vp56_rac_get_prob(c, 130);
2179 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2180 val += (vp56_rac_get_prob(c, 254) << 12);
2181 val += (vp56_rac_get_prob(c, 254) << 11);
2182 val += (vp56_rac_get_prob(c, 252) << 10);
2183 val += (vp56_rac_get_prob(c, 249) << 9);
2184 val += (vp56_rac_get_prob(c, 243) << 8);
2185 val += (vp56_rac_get_prob(c, 230) << 7);
2186 val += (vp56_rac_get_prob(c, 196) << 6);
2187 val += (vp56_rac_get_prob(c, 177) << 5);
2188 val += (vp56_rac_get_prob(c, 153) << 4);
2189 val += (vp56_rac_get_prob(c, 140) << 3);
2190 val += (vp56_rac_get_prob(c, 133) << 2);
2191 val += (vp56_rac_get_prob(c, 130) << 1);
2192 val += vp56_rac_get_prob(c, 129);
2197 band_left = band_counts[++band];
2199 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2201 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2202 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2204 } while (++i < n_coeffs);
2209 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2210 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2211 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2212 const int16_t (*nb)[2], const int16_t *band_counts,
2213 const int16_t *qmul)
2215 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2216 nnz, scan, nb, band_counts, qmul);
2219 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2220 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2221 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2222 const int16_t (*nb)[2], const int16_t *band_counts,
2223 const int16_t *qmul)
2225 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2226 nnz, scan, nb, band_counts, qmul);
2229 static void decode_coeffs(AVCodecContext *ctx)
2231 VP9Context *s = ctx->priv_data;
2233 int row = s->row, col = s->col;
2234 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2235 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2236 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2237 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2238 int end_x = FFMIN(2 * (s->cols - col), w4);
2239 int end_y = FFMIN(2 * (s->rows - row), h4);
2240 int n, pl, x, y, res;
2241 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2242 int tx = 4 * s->lossless + b->tx;
2243 const int16_t * const *yscans = vp9_scans[tx];
2244 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2245 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2246 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2247 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2248 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2249 static const int16_t band_counts[4][8] = {
2250 { 1, 2, 3, 4, 3, 16 - 13 },
2251 { 1, 2, 3, 4, 11, 64 - 21 },
2252 { 1, 2, 3, 4, 11, 256 - 21 },
2253 { 1, 2, 3, 4, 11, 1024 - 21 },
2255 const int16_t *y_band_counts = band_counts[b->tx];
2256 const int16_t *uv_band_counts = band_counts[b->uvtx];
2258 #define MERGE(la, end, step, rd) \
2259 for (n = 0; n < end; n += step) \
2260 la[n] = !!rd(&la[n])
2261 #define MERGE_CTX(step, rd) \
2263 MERGE(l, end_y, step, rd); \
2264 MERGE(a, end_x, step, rd); \
2267 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2268 for (n = 0, y = 0; y < end_y; y += step) { \
2269 for (x = 0; x < end_x; x += step, n += step * step) { \
2270 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2271 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2272 c, e, p, a[x] + l[y], yscans[txtp], \
2273 ynbs[txtp], y_band_counts, qmul[0]); \
2274 a[x] = l[y] = !!res; \
2276 AV_WN16A(&s->eob[n], res); \
2283 #define SPLAT(la, end, step, cond) \
2285 for (n = 1; n < end; n += step) \
2286 la[n] = la[n - 1]; \
2287 } else if (step == 4) { \
2289 for (n = 0; n < end; n += step) \
2290 AV_WN32A(&la[n], la[n] * 0x01010101); \
2292 for (n = 0; n < end; n += step) \
2293 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2295 } else /* step == 8 */ { \
2297 if (HAVE_FAST_64BIT) { \
2298 for (n = 0; n < end; n += step) \
2299 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2301 for (n = 0; n < end; n += step) { \
2302 uint32_t v32 = la[n] * 0x01010101; \
2303 AV_WN32A(&la[n], v32); \
2304 AV_WN32A(&la[n + 4], v32); \
2308 for (n = 0; n < end; n += step) \
2309 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2312 #define SPLAT_CTX(step) \
2314 SPLAT(a, end_x, step, end_x == w4); \
2315 SPLAT(l, end_y, step, end_y == h4); \
2321 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2324 MERGE_CTX(2, AV_RN16A);
2325 DECODE_Y_COEF_LOOP(2, 0,);
2329 MERGE_CTX(4, AV_RN32A);
2330 DECODE_Y_COEF_LOOP(4, 0,);
2334 MERGE_CTX(8, AV_RN64A);
2335 DECODE_Y_COEF_LOOP(8, 0, 32);
2340 #define DECODE_UV_COEF_LOOP(step, decode_coeffs_fn) \
2341 for (n = 0, y = 0; y < end_y; y += step) { \
2342 for (x = 0; x < end_x; x += step, n += step * step) { \
2343 res = decode_coeffs_fn(&s->c, s->uvblock[pl] + 16 * n, \
2344 16 * step * step, c, e, p, a[x] + l[y], \
2345 uvscan, uvnb, uv_band_counts, qmul[1]); \
2346 a[x] = l[y] = !!res; \
2348 AV_WN16A(&s->uveob[pl][n], res); \
2350 s->uveob[pl][n] = res; \
2355 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2356 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2357 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2362 for (pl = 0; pl < 2; pl++) {
2363 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2364 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2367 DECODE_UV_COEF_LOOP(1, decode_coeffs_b);
2370 MERGE_CTX(2, AV_RN16A);
2371 DECODE_UV_COEF_LOOP(2, decode_coeffs_b);
2375 MERGE_CTX(4, AV_RN32A);
2376 DECODE_UV_COEF_LOOP(4, decode_coeffs_b);
2380 MERGE_CTX(8, AV_RN64A);
2381 DECODE_UV_COEF_LOOP(8, decode_coeffs_b32);
2388 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2389 uint8_t *dst_edge, ptrdiff_t stride_edge,
2390 uint8_t *dst_inner, ptrdiff_t stride_inner,
2391 uint8_t *l, int col, int x, int w,
2392 int row, int y, enum TxfmMode tx,
2395 int have_top = row > 0 || y > 0;
2396 int have_left = col > s->tiling.tile_col_start || x > 0;
2397 int have_right = x < w - 1;
2398 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2399 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2400 { DC_127_PRED, VERT_PRED } },
2401 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2402 { HOR_PRED, HOR_PRED } },
2403 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2404 { LEFT_DC_PRED, DC_PRED } },
2405 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2406 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2407 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2408 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2409 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2410 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2411 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2412 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2413 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2414 { DC_127_PRED, VERT_LEFT_PRED } },
2415 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2416 { HOR_UP_PRED, HOR_UP_PRED } },
2417 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2418 { HOR_PRED, TM_VP8_PRED } },
2420 static const struct {
2421 uint8_t needs_left:1;
2422 uint8_t needs_top:1;
2423 uint8_t needs_topleft:1;
2424 uint8_t needs_topright:1;
2425 uint8_t invert_left:1;
2426 } edges[N_INTRA_PRED_MODES] = {
2427 [VERT_PRED] = { .needs_top = 1 },
2428 [HOR_PRED] = { .needs_left = 1 },
2429 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2430 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2431 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2432 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2433 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2434 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2435 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2436 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2437 [LEFT_DC_PRED] = { .needs_left = 1 },
2438 [TOP_DC_PRED] = { .needs_top = 1 },
2439 [DC_128_PRED] = { 0 },
2440 [DC_127_PRED] = { 0 },
2441 [DC_129_PRED] = { 0 }
2444 av_assert2(mode >= 0 && mode < 10);
2445 mode = mode_conv[mode][have_left][have_top];
2446 if (edges[mode].needs_top) {
2447 uint8_t *top, *topleft;
2448 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2449 int n_px_need_tr = 0;
2451 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2454 // if top of sb64-row, use s->intra_pred_data[] instead of
2455 // dst[-stride] for intra prediction (it contains pre- instead of
2456 // post-loopfilter data)
2458 top = !(row & 7) && !y ?
2459 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2460 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2462 topleft = !(row & 7) && !y ?
2463 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2464 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2465 &dst_inner[-stride_inner];
2469 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2470 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2471 n_px_need + n_px_need_tr <= n_px_have) {
2475 if (n_px_need <= n_px_have) {
2476 memcpy(*a, top, n_px_need);
2478 memcpy(*a, top, n_px_have);
2479 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2480 n_px_need - n_px_have);
2483 memset(*a, 127, n_px_need);
2485 if (edges[mode].needs_topleft) {
2486 if (have_left && have_top) {
2487 (*a)[-1] = topleft[-1];
2489 (*a)[-1] = have_top ? 129 : 127;
2492 if (tx == TX_4X4 && edges[mode].needs_topright) {
2493 if (have_top && have_right &&
2494 n_px_need + n_px_need_tr <= n_px_have) {
2495 memcpy(&(*a)[4], &top[4], 4);
2497 memset(&(*a)[4], (*a)[3], 4);
2502 if (edges[mode].needs_left) {
2504 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2505 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2506 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2508 if (edges[mode].invert_left) {
2509 if (n_px_need <= n_px_have) {
2510 for (i = 0; i < n_px_need; i++)
2511 l[i] = dst[i * stride - 1];
2513 for (i = 0; i < n_px_have; i++)
2514 l[i] = dst[i * stride - 1];
2515 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2518 if (n_px_need <= n_px_have) {
2519 for (i = 0; i < n_px_need; i++)
2520 l[n_px_need - 1 - i] = dst[i * stride - 1];
2522 for (i = 0; i < n_px_have; i++)
2523 l[n_px_need - 1 - i] = dst[i * stride - 1];
2524 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2528 memset(l, 129, 4 << tx);
2535 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2537 VP9Context *s = ctx->priv_data;
2539 int row = s->row, col = s->col;
2540 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2541 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2542 int end_x = FFMIN(2 * (s->cols - col), w4);
2543 int end_y = FFMIN(2 * (s->rows - row), h4);
2544 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2545 int uvstep1d = 1 << b->uvtx, p;
2546 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2547 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2548 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2550 for (n = 0, y = 0; y < end_y; y += step1d) {
2551 uint8_t *ptr = dst, *ptr_r = dst_r;
2552 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2553 ptr_r += 4 * step1d, n += step) {
2554 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2556 uint8_t *a = &a_buf[32];
2557 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2558 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2560 mode = check_intra_mode(s, mode, &a, ptr_r,
2561 s->frames[CUR_FRAME].tf.f->linesize[0],
2562 ptr, s->y_stride, l,
2563 col, x, w4, row, y, b->tx, 0);
2564 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2566 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2567 s->block + 16 * n, eob);
2569 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2570 dst += 4 * step1d * s->y_stride;
2577 step = 1 << (b->uvtx * 2);
2578 for (p = 0; p < 2; p++) {
2579 dst = s->dst[1 + p];
2580 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2581 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2582 uint8_t *ptr = dst, *ptr_r = dst_r;
2583 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2584 ptr_r += 4 * uvstep1d, n += step) {
2585 int mode = b->uvmode;
2586 uint8_t *a = &a_buf[32];
2587 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2589 mode = check_intra_mode(s, mode, &a, ptr_r,
2590 s->frames[CUR_FRAME].tf.f->linesize[1],
2591 ptr, s->uv_stride, l,
2592 col, x, w4, row, y, b->uvtx, p + 1);
2593 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2595 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2596 s->uvblock[p] + 16 * n, eob);
2598 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2599 dst += 4 * uvstep1d * s->uv_stride;
2604 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2605 uint8_t *dst, ptrdiff_t dst_stride,
2606 const uint8_t *ref, ptrdiff_t ref_stride,
2607 ThreadFrame *ref_frame,
2608 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2609 int bw, int bh, int w, int h,
2610 const uint16_t *scale, const uint8_t *step)
2612 #define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14)
2613 // BUG libvpx seems to scale the two components separately. This introduces
2614 // rounding errors but we have to reproduce them to be exactly compatible
2615 // with the output from libvpx...
2616 int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2617 int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2618 int refbw_m1, refbh_m1;
2623 ref += y * ref_stride + x;
2626 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2627 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2628 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2629 // we use +7 because the last 7 pixels of each sbrow can be changed in
2630 // the longest loopfilter of the next sbrow
2631 th = (y + refbh_m1 + 4 + 7) >> 6;
2632 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2633 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2634 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2635 ref - 3 * ref_stride - 3,
2637 refbw_m1 + 8, refbh_m1 + 8,
2638 x - 3, y - 3, w, h);
2639 ref = s->edge_emu_buffer + 3 * 144 + 3;
2642 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2645 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2646 uint8_t *dst_u, uint8_t *dst_v,
2647 ptrdiff_t dst_stride,
2648 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2649 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2650 ThreadFrame *ref_frame,
2651 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2652 int bw, int bh, int w, int h,
2653 const uint16_t *scale, const uint8_t *step)
2655 // BUG https://code.google.com/p/webm/issues/detail?id=820
2656 int mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2657 int my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2659 int refbw_m1, refbh_m1;
2664 ref_u += y * src_stride_u + x;
2665 ref_v += y * src_stride_v + x;
2668 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2669 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2670 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2671 // we use +7 because the last 7 pixels of each sbrow can be changed in
2672 // the longest loopfilter of the next sbrow
2673 th = (y + refbh_m1 + 4 + 7) >> 5;
2674 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2675 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2676 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2677 ref_u - 3 * src_stride_u - 3,
2679 refbw_m1 + 8, refbh_m1 + 8,
2680 x - 3, y - 3, w, h);
2681 ref_u = s->edge_emu_buffer + 3 * 144 + 3;
2682 smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2684 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2685 ref_v - 3 * src_stride_v - 3,
2687 refbw_m1 + 8, refbh_m1 + 8,
2688 x - 3, y - 3, w, h);
2689 ref_v = s->edge_emu_buffer + 3 * 144 + 3;
2690 smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2692 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2693 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2697 #define FN(x) x##_scaled
2698 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2699 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2700 mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2701 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2702 row, col, mv, bw, bh, w, h, i) \
2703 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2704 row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2705 #include "vp9_mc_template.c"
2707 #undef mc_chroma_dir
2710 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2711 uint8_t *dst, ptrdiff_t dst_stride,
2712 const uint8_t *ref, ptrdiff_t ref_stride,
2713 ThreadFrame *ref_frame,
2714 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2715 int bw, int bh, int w, int h)
2717 int mx = mv->x, my = mv->y, th;
2721 ref += y * ref_stride + x;
2724 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2725 // we use +7 because the last 7 pixels of each sbrow can be changed in
2726 // the longest loopfilter of the next sbrow
2727 th = (y + bh + 4 * !!my + 7) >> 6;
2728 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2729 if (x < !!mx * 3 || y < !!my * 3 ||
2730 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2731 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2732 ref - !!my * 3 * ref_stride - !!mx * 3,
2734 bw + !!mx * 7, bh + !!my * 7,
2735 x - !!mx * 3, y - !!my * 3, w, h);
2736 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2739 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2742 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2743 uint8_t *dst_u, uint8_t *dst_v,
2744 ptrdiff_t dst_stride,
2745 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2746 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2747 ThreadFrame *ref_frame,
2748 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2749 int bw, int bh, int w, int h)
2751 int mx = mv->x, my = mv->y, th;
2755 ref_u += y * src_stride_u + x;
2756 ref_v += y * src_stride_v + x;
2759 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2760 // we use +7 because the last 7 pixels of each sbrow can be changed in
2761 // the longest loopfilter of the next sbrow
2762 th = (y + bh + 4 * !!my + 7) >> 5;
2763 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2764 if (x < !!mx * 3 || y < !!my * 3 ||
2765 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2766 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2767 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2769 bw + !!mx * 7, bh + !!my * 7,
2770 x - !!mx * 3, y - !!my * 3, w, h);
2771 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2772 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2774 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2775 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2777 bw + !!mx * 7, bh + !!my * 7,
2778 x - !!mx * 3, y - !!my * 3, w, h);
2779 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2780 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2782 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2783 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2788 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2789 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2791 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2792 row, col, mv, bw, bh, w, h, i) \
2793 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2794 row, col, mv, bw, bh, w, h)
2795 #include "vp9_mc_template.c"
2796 #undef mc_luma_dir_dir
2797 #undef mc_chroma_dir_dir
2800 static void inter_recon(AVCodecContext *ctx)
2802 VP9Context *s = ctx->priv_data;
2804 int row = s->row, col = s->col;
2806 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2807 inter_pred_scaled(ctx);
2812 /* mostly copied intra_recon() */
2814 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2815 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2816 int end_x = FFMIN(2 * (s->cols - col), w4);
2817 int end_y = FFMIN(2 * (s->rows - row), h4);
2818 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2819 int uvstep1d = 1 << b->uvtx, p;
2820 uint8_t *dst = s->dst[0];
2823 for (n = 0, y = 0; y < end_y; y += step1d) {
2825 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2826 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2829 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2830 s->block + 16 * n, eob);
2832 dst += 4 * s->y_stride * step1d;
2838 step = 1 << (b->uvtx * 2);
2839 for (p = 0; p < 2; p++) {
2840 dst = s->dst[p + 1];
2841 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2843 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2844 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2847 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2848 s->uvblock[p] + 16 * n, eob);
2850 dst += 4 * uvstep1d * s->uv_stride;
2856 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2857 int row_and_7, int col_and_7,
2858 int w, int h, int col_end, int row_end,
2859 enum TxfmMode tx, int skip_inter)
2861 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2862 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2863 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2864 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2866 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2867 // edges. This means that for UV, we work on two subsampled blocks at
2868 // a time, and we only use the topleft block's mode information to set
2869 // things like block strength. Thus, for any block size smaller than
2870 // 16x16, ignore the odd portion of the block.
2871 if (tx == TX_4X4 && is_uv) {
2886 if (tx == TX_4X4 && !skip_inter) {
2887 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2888 int m_col_odd = (t << (w - 1)) - t;
2890 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2892 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2894 for (y = row_and_7; y < h + row_and_7; y++) {
2895 int col_mask_id = 2 - !(y & 7);
2897 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2898 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2899 // for odd lines, if the odd col is not being filtered,
2900 // skip odd row also:
2907 // if a/c are even row/col and b/d are odd, and d is skipped,
2908 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2909 if ((col_end & 1) && (y & 1)) {
2910 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2912 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2916 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2918 for (y = row_and_7; y < h + row_and_7; y++) {
2919 int col_mask_id = 2 - !(y & 3);
2921 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2922 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2923 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2924 lflvl->mask[is_uv][0][y][3] |= m_col;
2925 lflvl->mask[is_uv][1][y][3] |= m_col;
2929 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2932 int mask_id = (tx == TX_8X8);
2933 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2934 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2935 int m_row = m_col & masks[l2];
2937 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2938 // 8wd loopfilter to prevent going off the visible edge.
2939 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2940 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2941 int m_row_8 = m_row - m_row_16;
2943 for (y = row_and_7; y < h + row_and_7; y++) {
2944 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2945 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2948 for (y = row_and_7; y < h + row_and_7; y++)
2949 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2952 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2953 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2954 lflvl->mask[is_uv][1][y][0] |= m_col;
2955 if (y - row_and_7 == h - 1)
2956 lflvl->mask[is_uv][1][y][1] |= m_col;
2958 for (y = row_and_7; y < h + row_and_7; y += step1d)
2959 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2961 } else if (tx != TX_4X4) {
2964 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2965 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2966 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2967 for (y = row_and_7; y < h + row_and_7; y++)
2968 lflvl->mask[is_uv][0][y][mask_id] |= t;
2970 int t8 = t & 0x01, t4 = t - t8;
2972 for (y = row_and_7; y < h + row_and_7; y++) {
2973 lflvl->mask[is_uv][0][y][2] |= t4;
2974 lflvl->mask[is_uv][0][y][1] |= t8;
2976 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2978 int t8 = t & 0x11, t4 = t - t8;
2980 for (y = row_and_7; y < h + row_and_7; y++) {
2981 lflvl->mask[is_uv][0][y][2] |= t4;
2982 lflvl->mask[is_uv][0][y][1] |= t8;
2984 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2989 static void decode_b(AVCodecContext *ctx, int row, int col,
2990 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2991 enum BlockLevel bl, enum BlockPartition bp)
2993 VP9Context *s = ctx->priv_data;
2995 enum BlockSize bs = bl * 3 + bp;
2996 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2998 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3004 s->min_mv.x = -(128 + col * 64);
3005 s->min_mv.y = -(128 + row * 64);
3006 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3007 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3013 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
3020 #define SPLAT_ZERO_CTX(v, n) \
3022 case 1: v = 0; break; \
3023 case 2: AV_ZERO16(&v); break; \
3024 case 4: AV_ZERO32(&v); break; \
3025 case 8: AV_ZERO64(&v); break; \
3026 case 16: AV_ZERO128(&v); break; \
3028 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3030 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3031 if (s->ss_##dir2) { \
3032 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3033 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3035 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3036 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3041 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3042 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3043 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3044 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3047 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3048 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3049 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3050 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3055 s->block += w4 * h4 * 64;
3056 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3057 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_h + s->ss_v);
3058 s->eob += 4 * w4 * h4;
3059 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3060 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3066 // emulated overhangs if the stride of the target buffer can't hold. This
3067 // allows to support emu-edge and so on even if we have large block
3069 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3070 (row + h4) > s->rows;
3071 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3072 (row + h4) > s->rows;
3074 s->dst[0] = s->tmp_y;
3077 s->dst[0] = f->data[0] + yoff;
3078 s->y_stride = f->linesize[0];
3081 s->dst[1] = s->tmp_uv[0];
3082 s->dst[2] = s->tmp_uv[1];
3085 s->dst[1] = f->data[1] + uvoff;
3086 s->dst[2] = f->data[2] + uvoff;
3087 s->uv_stride = f->linesize[1];
3090 intra_recon(ctx, yoff, uvoff);
3095 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3097 for (n = 0; o < w; n++) {
3102 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3103 s->tmp_y + o, 64, h, 0, 0);
3109 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3111 for (n = 1; o < w; n++) {
3116 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3117 s->tmp_uv[0] + o, 32, h, 0, 0);
3118 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3119 s->tmp_uv[1] + o, 32, h, 0, 0);
3125 // pick filter level and find edges to apply filter to
3126 if (s->filter.level &&
3127 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3128 [b->mode[3] != ZEROMV]) > 0) {
3129 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3130 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3132 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3133 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3134 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3135 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3136 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3137 b->uvtx, skip_inter);
3139 if (!s->filter.lim_lut[lvl]) {
3140 int sharp = s->filter.sharpness;
3144 limit >>= (sharp + 3) >> 2;
3145 limit = FFMIN(limit, 9 - sharp);
3147 limit = FFMAX(limit, 1);
3149 s->filter.lim_lut[lvl] = limit;
3150 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3156 s->block += w4 * h4 * 64;
3157 s->uvblock[0] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3158 s->uvblock[1] += w4 * h4 * 64 >> (s->ss_v + s->ss_h);
3159 s->eob += 4 * w4 * h4;
3160 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3161 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3165 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3166 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3168 VP9Context *s = ctx->priv_data;
3169 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3170 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3171 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3172 s->prob.p.partition[bl][c];
3173 enum BlockPartition bp;
3174 ptrdiff_t hbs = 4 >> bl;
3175 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3176 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3179 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3180 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3181 } else if (col + hbs < s->cols) { // FIXME why not <=?
3182 if (row + hbs < s->rows) { // FIXME why not <=?
3183 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3185 case PARTITION_NONE:
3186 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3189 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3190 yoff += hbs * 8 * y_stride;
3191 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3192 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3195 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3197 uvoff += hbs * 8 >> s->ss_h;
3198 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3200 case PARTITION_SPLIT:
3201 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3202 decode_sb(ctx, row, col + hbs, lflvl,
3203 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3204 yoff += hbs * 8 * y_stride;
3205 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3206 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3207 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3208 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3213 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3214 bp = PARTITION_SPLIT;
3215 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3216 decode_sb(ctx, row, col + hbs, lflvl,
3217 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3220 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3222 } else if (row + hbs < s->rows) { // FIXME why not <=?
3223 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3224 bp = PARTITION_SPLIT;
3225 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3226 yoff += hbs * 8 * y_stride;
3227 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3228 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3231 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3234 bp = PARTITION_SPLIT;
3235 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3237 s->counts.partition[bl][c][bp]++;
3240 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3241 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3243 VP9Context *s = ctx->priv_data;
3245 ptrdiff_t hbs = 4 >> bl;
3246 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3247 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3250 av_assert2(b->bl == BL_8X8);
3251 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3252 } else if (s->b->bl == bl) {
3253 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3254 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3255 yoff += hbs * 8 * y_stride;
3256 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3257 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3258 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3260 uvoff += hbs * 8 >> s->ss_h;
3261 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3264 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3265 if (col + hbs < s->cols) { // FIXME why not <=?
3266 if (row + hbs < s->rows) {
3267 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3268 uvoff + (8 * hbs >> s->ss_h), bl + 1);
3269 yoff += hbs * 8 * y_stride;
3270 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3271 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3272 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3273 yoff + 8 * hbs, uvoff + (8 * hbs >> s->ss_h), bl + 1);
3276 uvoff += hbs * 8 >> s->ss_h;
3277 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3279 } else if (row + hbs < s->rows) {
3280 yoff += hbs * 8 * y_stride;
3281 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3282 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3287 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3288 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3290 VP9Context *s = ctx->priv_data;
3291 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3292 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3293 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3296 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3297 // if you think of them as acting on a 8x8 block max, we can interleave
3298 // each v/h within the single x loop, but that only works if we work on
3299 // 8 pixel blocks, and we won't always do that (we want at least 16px
3300 // to use SSE2 optimizations, perhaps 32 for AVX2)
3302 // filter edges between columns, Y plane (e.g. block1 | block2)
3303 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3304 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3305 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3306 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3307 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3308 unsigned hm = hm1 | hm2 | hm13 | hm23;
3310 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3312 int L = *l, H = L >> 4;
3313 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3316 if (hmask1[0] & x) {
3317 if (hmask2[0] & x) {
3318 av_assert2(l[8] == L);
3319 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3321 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3323 } else if (hm2 & x) {
3326 E |= s->filter.mblim_lut[L] << 8;
3327 I |= s->filter.lim_lut[L] << 8;
3328 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3330 [0](ptr, ls_y, E, I, H);
3332 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3333 [0](ptr, ls_y, E, I, H);
3336 } else if (hm2 & x) {
3337 int L = l[8], H = L >> 4;
3338 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3341 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3342 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3346 int L = *l, H = L >> 4;
3347 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3352 E |= s->filter.mblim_lut[L] << 8;
3353 I |= s->filter.lim_lut[L] << 8;
3354 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3356 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3358 } else if (hm23 & x) {
3359 int L = l[8], H = L >> 4;
3360 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3362 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3368 // filter edges between rows, Y plane (e.g. ------)
3370 dst = f->data[0] + yoff;
3372 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3373 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3374 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3376 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3379 int L = *l, H = L >> 4;
3380 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3383 if (vmask[0] & (x << 1)) {
3384 av_assert2(l[1] == L);
3385 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3387 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3389 } else if (vm & (x << 1)) {
3392 E |= s->filter.mblim_lut[L] << 8;
3393 I |= s->filter.lim_lut[L] << 8;
3394 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3395 [!!(vmask[1] & (x << 1))]
3396 [1](ptr, ls_y, E, I, H);
3398 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3399 [1](ptr, ls_y, E, I, H);
3401 } else if (vm & (x << 1)) {
3402 int L = l[1], H = L >> 4;
3403 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3405 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3406 [1](ptr + 8, ls_y, E, I, H);
3410 int L = *l, H = L >> 4;
3411 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3413 if (vm3 & (x << 1)) {
3416 E |= s->filter.mblim_lut[L] << 8;
3417 I |= s->filter.lim_lut[L] << 8;
3418 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3420 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3422 } else if (vm3 & (x << 1)) {
3423 int L = l[1], H = L >> 4;
3424 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3426 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3431 // same principle but for U/V planes
3432 for (p = 0; p < 2; p++) {
3434 dst = f->data[1 + p] + uvoff;
3435 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3436 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3437 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3438 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3439 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3441 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3444 int L = *l, H = L >> 4;
3445 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3447 if (hmask1[0] & x) {
3448 if (hmask2[0] & x) {
3449 av_assert2(l[16] == L);
3450 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3452 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3454 } else if (hm2 & x) {
3457 E |= s->filter.mblim_lut[L] << 8;
3458 I |= s->filter.lim_lut[L] << 8;
3459 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3461 [0](ptr, ls_uv, E, I, H);
3463 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3464 [0](ptr, ls_uv, E, I, H);
3466 } else if (hm2 & x) {
3467 int L = l[16], H = L >> 4;
3468 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3470 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3471 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3479 dst = f->data[1 + p] + uvoff;
3480 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3481 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3482 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3484 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3487 int L = *l, H = L >> 4;
3488 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3491 if (vmask[0] & (x << 2)) {
3492 av_assert2(l[2] == L);
3493 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3495 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3497 } else if (vm & (x << 2)) {
3500 E |= s->filter.mblim_lut[L] << 8;
3501 I |= s->filter.lim_lut[L] << 8;
3502 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3503 [!!(vmask[1] & (x << 2))]
3504 [1](ptr, ls_uv, E, I, H);
3506 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3507 [1](ptr, ls_uv, E, I, H);
3509 } else if (vm & (x << 2)) {
3510 int L = l[2], H = L >> 4;
3511 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3513 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3514 [1](ptr + 8, ls_uv, E, I, H);
3524 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3526 int sb_start = ( idx * n) >> log2_n;
3527 int sb_end = ((idx + 1) * n) >> log2_n;
3528 *start = FFMIN(sb_start, n) << 3;
3529 *end = FFMIN(sb_end, n) << 3;
3532 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3533 int max_count, int update_factor)
3535 unsigned ct = ct0 + ct1, p2, p1;
3541 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3542 p2 = av_clip(p2, 1, 255);
3543 ct = FFMIN(ct, max_count);
3544 update_factor = FASTDIV(update_factor * ct, max_count);
3546 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3547 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3550 static void adapt_probs(VP9Context *s)
3553 prob_context *p = &s->prob_ctx[s->framectxid].p;
3554 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3557 for (i = 0; i < 4; i++)
3558 for (j = 0; j < 2; j++)
3559 for (k = 0; k < 2; k++)
3560 for (l = 0; l < 6; l++)
3561 for (m = 0; m < 6; m++) {
3562 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3563 unsigned *e = s->counts.eob[i][j][k][l][m];
3564 unsigned *c = s->counts.coef[i][j][k][l][m];
3566 if (l == 0 && m >= 3) // dc only has 3 pt
3569 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3570 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3571 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3574 if (s->keyframe || s->intraonly) {
3575 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3576 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3577 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3578 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3583 for (i = 0; i < 3; i++)
3584 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3587 for (i = 0; i < 4; i++)
3588 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3591 if (s->comppredmode == PRED_SWITCHABLE) {
3592 for (i = 0; i < 5; i++)
3593 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3597 if (s->comppredmode != PRED_SINGLEREF) {
3598 for (i = 0; i < 5; i++)
3599 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3600 s->counts.comp_ref[i][1], 20, 128);
3603 if (s->comppredmode != PRED_COMPREF) {
3604 for (i = 0; i < 5; i++) {
3605 uint8_t *pp = p->single_ref[i];
3606 unsigned (*c)[2] = s->counts.single_ref[i];
3608 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3609 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3613 // block partitioning
3614 for (i = 0; i < 4; i++)
3615 for (j = 0; j < 4; j++) {
3616 uint8_t *pp = p->partition[i][j];
3617 unsigned *c = s->counts.partition[i][j];
3619 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3620 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3621 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3625 if (s->txfmmode == TX_SWITCHABLE) {
3626 for (i = 0; i < 2; i++) {
3627 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3629 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3630 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3631 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3632 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3633 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3634 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3638 // interpolation filter
3639 if (s->filtermode == FILTER_SWITCHABLE) {
3640 for (i = 0; i < 4; i++) {
3641 uint8_t *pp = p->filter[i];
3642 unsigned *c = s->counts.filter[i];
3644 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3645 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3650 for (i = 0; i < 7; i++) {
3651 uint8_t *pp = p->mv_mode[i];
3652 unsigned *c = s->counts.mv_mode[i];
3654 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3655 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3656 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3661 uint8_t *pp = p->mv_joint;
3662 unsigned *c = s->counts.mv_joint;
3664 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3665 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3666 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3670 for (i = 0; i < 2; i++) {
3672 unsigned *c, (*c2)[2], sum;
3674 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3675 s->counts.mv_comp[i].sign[1], 20, 128);
3677 pp = p->mv_comp[i].classes;
3678 c = s->counts.mv_comp[i].classes;
3679 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3680 adapt_prob(&pp[0], c[0], sum, 20, 128);
3682 adapt_prob(&pp[1], c[1], sum, 20, 128);
3684 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3685 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3687 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3688 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3690 adapt_prob(&pp[6], c[6], sum, 20, 128);
3691 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3692 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3693 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3695 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3696 s->counts.mv_comp[i].class0[1], 20, 128);
3697 pp = p->mv_comp[i].bits;
3698 c2 = s->counts.mv_comp[i].bits;
3699 for (j = 0; j < 10; j++)
3700 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3702 for (j = 0; j < 2; j++) {
3703 pp = p->mv_comp[i].class0_fp[j];
3704 c = s->counts.mv_comp[i].class0_fp[j];
3705 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3706 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3707 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3709 pp = p->mv_comp[i].fp;
3710 c = s->counts.mv_comp[i].fp;
3711 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3712 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3713 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3715 if (s->highprecisionmvs) {
3716 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3717 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3718 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3719 s->counts.mv_comp[i].hp[1], 20, 128);
3724 for (i = 0; i < 4; i++) {
3725 uint8_t *pp = p->y_mode[i];
3726 unsigned *c = s->counts.y_mode[i], sum, s2;
3728 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3729 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3730 sum -= c[TM_VP8_PRED];
3731 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3732 sum -= c[VERT_PRED];
3733 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3734 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3736 adapt_prob(&pp[3], s2, sum, 20, 128);
3738 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3739 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3740 sum -= c[DIAG_DOWN_LEFT_PRED];
3741 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3742 sum -= c[VERT_LEFT_PRED];
3743 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3744 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3748 for (i = 0; i < 10; i++) {
3749 uint8_t *pp = p->uv_mode[i];
3750 unsigned *c = s->counts.uv_mode[i], sum, s2;
3752 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3753 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3754 sum -= c[TM_VP8_PRED];
3755 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3756 sum -= c[VERT_PRED];
3757 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3758 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3760 adapt_prob(&pp[3], s2, sum, 20, 128);
3762 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3763 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3764 sum -= c[DIAG_DOWN_LEFT_PRED];
3765 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3766 sum -= c[VERT_LEFT_PRED];
3767 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3768 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3772 static void free_buffers(VP9Context *s)
3774 av_freep(&s->intra_pred_data[0]);
3775 av_freep(&s->b_base);
3776 av_freep(&s->block_base);
3779 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3781 VP9Context *s = ctx->priv_data;
3784 for (i = 0; i < 3; i++) {
3785 if (s->frames[i].tf.f->data[0])
3786 vp9_unref_frame(ctx, &s->frames[i]);
3787 av_frame_free(&s->frames[i].tf.f);
3789 for (i = 0; i < 8; i++) {
3790 if (s->refs[i].f->data[0])
3791 ff_thread_release_buffer(ctx, &s->refs[i]);
3792 av_frame_free(&s->refs[i].f);
3793 if (s->next_refs[i].f->data[0])
3794 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3795 av_frame_free(&s->next_refs[i].f);
3805 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3806 int *got_frame, AVPacket *pkt)
3808 const uint8_t *data = pkt->data;
3809 int size = pkt->size;
3810 VP9Context *s = ctx->priv_data;
3811 int res, tile_row, tile_col, i, ref, row, col;
3812 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3813 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3816 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3818 } else if (res == 0) {
3819 if (!s->refs[ref].f->data[0]) {
3820 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3821 return AVERROR_INVALIDDATA;
3823 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3825 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3826 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3827 for (i = 0; i < 8; i++) {
3828 if (s->next_refs[i].f->data[0])
3829 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3830 if (s->refs[i].f->data[0] &&
3831 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
3840 if (!retain_segmap_ref) {
3841 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3842 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3843 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3844 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3847 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3848 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3849 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3850 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3852 if (s->frames[CUR_FRAME].tf.f->data[0])
3853 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3854 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3856 f = s->frames[CUR_FRAME].tf.f;
3857 f->key_frame = s->keyframe;
3858 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3859 ls_y = f->linesize[0];
3860 ls_uv =f->linesize[1];
3863 for (i = 0; i < 8; i++) {
3864 if (s->next_refs[i].f->data[0])
3865 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3866 if (s->refreshrefmask & (1 << i)) {
3867 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3868 } else if (s->refs[i].f->data[0]) {
3869 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3875 // main tile decode loop
3876 memset(s->above_partition_ctx, 0, s->cols);
3877 memset(s->above_skip_ctx, 0, s->cols);
3878 if (s->keyframe || s->intraonly) {
3879 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3881 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3883 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3884 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
3885 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
3886 memset(s->above_segpred_ctx, 0, s->cols);
3887 s->pass = s->frames[CUR_FRAME].uses_2pass =
3888 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3889 if ((res = update_block_buffers(ctx)) < 0) {
3890 av_log(ctx, AV_LOG_ERROR,
3891 "Failed to allocate block buffers\n");
3894 if (s->refreshctx && s->parallelmode) {
3897 for (i = 0; i < 4; i++) {
3898 for (j = 0; j < 2; j++)
3899 for (k = 0; k < 2; k++)
3900 for (l = 0; l < 6; l++)
3901 for (m = 0; m < 6; m++)
3902 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3903 s->prob.coef[i][j][k][l][m], 3);
3904 if (s->txfmmode == i)
3907 s->prob_ctx[s->framectxid].p = s->prob.p;
3908 ff_thread_finish_setup(ctx);
3909 } else if (!s->refreshctx) {
3910 ff_thread_finish_setup(ctx);
3916 s->block = s->block_base;
3917 s->uvblock[0] = s->uvblock_base[0];
3918 s->uvblock[1] = s->uvblock_base[1];
3919 s->eob = s->eob_base;
3920 s->uveob[0] = s->uveob_base[0];
3921 s->uveob[1] = s->uveob_base[1];
3923 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3924 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3925 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3927 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3930 if (tile_col == s->tiling.tile_cols - 1 &&
3931 tile_row == s->tiling.tile_rows - 1) {
3934 tile_size = AV_RB32(data);
3938 if (tile_size > size) {
3939 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3940 return AVERROR_INVALIDDATA;
3942 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3943 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3944 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3945 return AVERROR_INVALIDDATA;
3952 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3953 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
3954 struct VP9Filter *lflvl_ptr = s->lflvl;
3955 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3957 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3958 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3959 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3962 memset(s->left_partition_ctx, 0, 8);
3963 memset(s->left_skip_ctx, 0, 8);
3964 if (s->keyframe || s->intraonly) {
3965 memset(s->left_mode_ctx, DC_PRED, 16);
3967 memset(s->left_mode_ctx, NEARESTMV, 8);
3969 memset(s->left_y_nnz_ctx, 0, 16);
3970 memset(s->left_uv_nnz_ctx, 0, 32);
3971 memset(s->left_segpred_ctx, 0, 8);
3973 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3976 for (col = s->tiling.tile_col_start;
3977 col < s->tiling.tile_col_end;
3978 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
3979 // FIXME integrate with lf code (i.e. zero after each
3980 // use, similar to invtxfm coefficients, or similar)
3982 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3986 decode_sb_mem(ctx, row, col, lflvl_ptr,
3987 yoff2, uvoff2, BL_64X64);
3989 decode_sb(ctx, row, col, lflvl_ptr,
3990 yoff2, uvoff2, BL_64X64);
3994 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4002 // backup pre-loopfilter reconstruction data for intra
4003 // prediction of next row of sb64s
4004 if (row + 8 < s->rows) {
4005 memcpy(s->intra_pred_data[0],
4006 f->data[0] + yoff + 63 * ls_y,
4008 memcpy(s->intra_pred_data[1],
4009 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4010 8 * s->cols >> s->ss_h);
4011 memcpy(s->intra_pred_data[2],
4012 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4013 8 * s->cols >> s->ss_h);
4016 // loopfilter one row
4017 if (s->filter.level) {
4020 lflvl_ptr = s->lflvl;
4021 for (col = 0; col < s->cols;
4022 col += 8, yoff2 += 64, uvoff2 += 64 >> s->ss_h, lflvl_ptr++) {
4023 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4027 // FIXME maybe we can make this more finegrained by running the
4028 // loopfilter per-block instead of after each sbrow
4029 // In fact that would also make intra pred left preparation easier?
4030 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4034 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4036 ff_thread_finish_setup(ctx);
4038 } while (s->pass++ == 1);
4039 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4042 for (i = 0; i < 8; i++) {
4043 if (s->refs[i].f->data[0])
4044 ff_thread_release_buffer(ctx, &s->refs[i]);
4045 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4048 if (!s->invisible) {
4049 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4057 static void vp9_decode_flush(AVCodecContext *ctx)
4059 VP9Context *s = ctx->priv_data;
4062 for (i = 0; i < 3; i++)
4063 vp9_unref_frame(ctx, &s->frames[i]);
4064 for (i = 0; i < 8; i++)
4065 ff_thread_release_buffer(ctx, &s->refs[i]);
4068 static int init_frames(AVCodecContext *ctx)
4070 VP9Context *s = ctx->priv_data;
4073 for (i = 0; i < 3; i++) {
4074 s->frames[i].tf.f = av_frame_alloc();
4075 if (!s->frames[i].tf.f) {
4076 vp9_decode_free(ctx);
4077 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4078 return AVERROR(ENOMEM);
4081 for (i = 0; i < 8; i++) {
4082 s->refs[i].f = av_frame_alloc();
4083 s->next_refs[i].f = av_frame_alloc();
4084 if (!s->refs[i].f || !s->next_refs[i].f) {
4085 vp9_decode_free(ctx);
4086 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4087 return AVERROR(ENOMEM);
4094 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4096 VP9Context *s = ctx->priv_data;
4098 ctx->internal->allocate_progress = 1;
4099 ff_vp9dsp_init(&s->dsp);
4100 ff_videodsp_init(&s->vdsp, 8);
4101 s->filter.sharpness = -1;
4103 return init_frames(ctx);
4106 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4108 return init_frames(avctx);
4111 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4114 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4116 // detect size changes in other threads
4117 if (s->intra_pred_data[0] &&
4118 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4122 for (i = 0; i < 3; i++) {
4123 if (s->frames[i].tf.f->data[0])
4124 vp9_unref_frame(dst, &s->frames[i]);
4125 if (ssrc->frames[i].tf.f->data[0]) {
4126 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4130 for (i = 0; i < 8; i++) {
4131 if (s->refs[i].f->data[0])
4132 ff_thread_release_buffer(dst, &s->refs[i]);
4133 if (ssrc->next_refs[i].f->data[0]) {
4134 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4139 s->invisible = ssrc->invisible;
4140 s->keyframe = ssrc->keyframe;
4141 s->ss_v = ssrc->ss_v;
4142 s->ss_h = ssrc->ss_h;
4143 s->segmentation.enabled = ssrc->segmentation.enabled;
4144 s->segmentation.update_map = ssrc->segmentation.update_map;
4145 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4146 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4147 if (ssrc->segmentation.enabled) {
4148 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4149 sizeof(s->segmentation.feat));
4155 AVCodec ff_vp9_decoder = {
4157 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4158 .type = AVMEDIA_TYPE_VIDEO,
4159 .id = AV_CODEC_ID_VP9,
4160 .priv_data_size = sizeof(VP9Context),
4161 .init = vp9_decode_init,
4162 .close = vp9_decode_free,
4163 .decode = vp9_decode_frame,
4164 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4165 .flush = vp9_decode_flush,
4166 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4167 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),