2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
156 uint8_t ignore_refmap;
161 uint8_t skip_enabled;
170 unsigned log2_tile_cols, log2_tile_rows;
171 unsigned tile_cols, tile_rows;
172 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174 unsigned sb_cols, sb_rows, rows, cols;
177 uint8_t coef[4][2][2][6][6][3];
181 uint8_t coef[4][2][2][6][6][11];
186 unsigned y_mode[4][10];
187 unsigned uv_mode[10][10];
188 unsigned filter[4][3];
189 unsigned mv_mode[7][4];
190 unsigned intra[4][2];
192 unsigned single_ref[5][2][2];
193 unsigned comp_ref[5][2];
194 unsigned tx32p[2][4];
195 unsigned tx16p[2][3];
198 unsigned mv_joint[4];
201 unsigned classes[11];
203 unsigned bits[10][2];
204 unsigned class0_fp[2][4];
206 unsigned class0_hp[2];
209 unsigned partition[4][4][4];
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
213 enum TxfmMode txfmmode;
214 enum CompPredMode comppredmode;
216 // contextual (left/above) cache
217 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229 uint8_t *above_partition_ctx;
230 uint8_t *above_mode_ctx;
231 // FIXME maybe merge some of the below in a flags field?
232 uint8_t *above_y_nnz_ctx;
233 uint8_t *above_uv_nnz_ctx[2];
234 uint8_t *above_skip_ctx; // 1bit
235 uint8_t *above_txfm_ctx; // 2bit
236 uint8_t *above_segpred_ctx; // 1bit
237 uint8_t *above_intra_ctx; // 1bit
238 uint8_t *above_comp_ctx; // 1bit
239 uint8_t *above_ref_ctx; // 2bit
240 uint8_t *above_filter_ctx;
241 VP56mv (*above_mv_ctx)[2];
244 uint8_t *intra_pred_data[3];
245 struct VP9Filter *lflvl;
246 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248 // block reconstruction intermediates
249 int block_alloc_using_2pass;
250 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252 struct { int x, y; } min_mv, max_mv;
253 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255 uint16_t mvscale[3][2];
256 uint8_t mvstep[3][2];
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271 VP9Context *s = ctx->priv_data;
274 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276 sz = 64 * s->sb_cols * s->sb_rows;
277 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278 ff_thread_release_buffer(ctx, &f->tf);
279 return AVERROR(ENOMEM);
282 f->segmentation_map = f->extradata->data;
283 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
307 dst->uses_2pass = src->uses_2pass;
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 VP9Context *s = ctx->priv_data;
316 int bytesperpixel = s->bytesperpixel;
318 av_assert0(w > 0 && h > 0);
320 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326 s->sb_cols = (w + 63) >> 6;
327 s->sb_rows = (h + 63) >> 6;
328 s->cols = (w + 7) >> 3;
329 s->rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332 av_freep(&s->intra_pred_data[0]);
333 // FIXME we slightly over-allocate here for subsampled chroma, but a little
334 // bit of padding shouldn't affect performance...
335 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338 return AVERROR(ENOMEM);
339 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342 assign(s->above_y_nnz_ctx, uint8_t *, 16);
343 assign(s->above_mode_ctx, uint8_t *, 16);
344 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347 assign(s->above_partition_ctx, uint8_t *, 8);
348 assign(s->above_skip_ctx, uint8_t *, 8);
349 assign(s->above_txfm_ctx, uint8_t *, 8);
350 assign(s->above_segpred_ctx, uint8_t *, 8);
351 assign(s->above_intra_ctx, uint8_t *, 8);
352 assign(s->above_comp_ctx, uint8_t *, 8);
353 assign(s->above_ref_ctx, uint8_t *, 8);
354 assign(s->above_filter_ctx, uint8_t *, 8);
355 assign(s->lflvl, struct VP9Filter *, 1);
358 // these will be re-allocated a little later
359 av_freep(&s->b_base);
360 av_freep(&s->block_base);
362 if (s->bpp != s->last_bpp) {
363 ff_vp9dsp_init(&s->dsp, s->bpp);
364 ff_videodsp_init(&s->vdsp, s->bpp);
365 s->last_bpp = s->bpp;
371 static int update_block_buffers(AVCodecContext *ctx)
373 VP9Context *s = ctx->priv_data;
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380 av_free(s->block_base);
381 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383 if (s->frames[CUR_FRAME].uses_2pass) {
384 int sbs = s->sb_cols * s->sb_rows;
386 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388 16 * 16 + 2 * chroma_eobs) * sbs);
389 if (!s->b_base || !s->block_base)
390 return AVERROR(ENOMEM);
391 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397 s->b_base = av_malloc(sizeof(VP9Block));
398 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399 16 * 16 + 2 * chroma_eobs);
400 if (!s->b_base || !s->block_base)
401 return AVERROR(ENOMEM);
402 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405 s->uveob_base[0] = s->eob_base + 16 * 16;
406 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
414 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416 int v = get_bits(gb, n);
417 return get_bits1(gb) ? -v : v;
420 static av_always_inline int inv_recenter_nonneg(int v, int m)
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
428 static const int inv_map_table[254] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 /* This code is trying to do a differential probability update. For a
452 * current probability A in the range [1, 255], the difference to a new
453 * probability of any value can be expressed differentially as 1-A,255-A
454 * where some part of this (absolute range) exists both in positive as
455 * well as the negative part, whereas another part only exists in one
456 * half. We're trying to code this shared part differentially, i.e.
457 * times two where the value of the lowest bit specifies the sign, and
458 * the single part is then coded on top of this. This absolute difference
459 * then again has a value of [0,254], but a bigger value in this range
460 * indicates that we're further away from the original value A, so we
461 * can code this as a VLC code, since higher values are increasingly
462 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463 * updates vs. the 'fine, exact' updates further down the range, which
464 * adds one extra dimension to this differential update model. */
466 if (!vp8_rac_get(c)) {
467 d = vp8_rac_get_uint(c, 4) + 0;
468 } else if (!vp8_rac_get(c)) {
469 d = vp8_rac_get_uint(c, 4) + 16;
470 } else if (!vp8_rac_get(c)) {
471 d = vp8_rac_get_uint(c, 5) + 32;
473 d = vp8_rac_get_uint(c, 7);
475 d = (d << 1) - 65 + vp8_rac_get(c);
479 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
480 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
483 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
485 static const enum AVColorSpace colorspaces[8] = {
486 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
487 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
489 VP9Context *s = ctx->priv_data;
490 enum AVPixelFormat res;
491 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
494 s->bpp = 8 + bits * 2;
495 s->bytesperpixel = (7 + s->bpp) >> 3;
496 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
497 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
498 static const enum AVPixelFormat pix_fmt_rgb[3] = {
499 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
501 if (ctx->profile & 1) {
502 s->ss_h = s->ss_v = 1;
503 res = pix_fmt_rgb[bits];
504 ctx->color_range = AVCOL_RANGE_JPEG;
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
508 return AVERROR_INVALIDDATA;
511 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
512 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
513 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
514 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
515 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
516 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
517 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
519 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
520 if (ctx->profile & 1) {
521 s->ss_h = get_bits1(&s->gb);
522 s->ss_v = get_bits1(&s->gb);
523 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
524 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
526 return AVERROR_INVALIDDATA;
527 } else if (get_bits1(&s->gb)) {
528 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
530 return AVERROR_INVALIDDATA;
533 s->ss_h = s->ss_v = 1;
534 res = pix_fmt_for_ss[bits][1][1];
541 static int decode_frame_header(AVCodecContext *ctx,
542 const uint8_t *data, int size, int *ref)
544 VP9Context *s = ctx->priv_data;
545 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
546 enum AVPixelFormat fmt = ctx->pix_fmt;
548 const uint8_t *data2;
551 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
552 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
555 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
556 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
557 return AVERROR_INVALIDDATA;
559 ctx->profile = get_bits1(&s->gb);
560 ctx->profile |= get_bits1(&s->gb) << 1;
561 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
562 if (ctx->profile > 3) {
563 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
564 return AVERROR_INVALIDDATA;
566 if (get_bits1(&s->gb)) {
567 *ref = get_bits(&s->gb, 3);
570 s->last_keyframe = s->keyframe;
571 s->keyframe = !get_bits1(&s->gb);
572 last_invisible = s->invisible;
573 s->invisible = !get_bits1(&s->gb);
574 s->errorres = get_bits1(&s->gb);
575 s->use_last_frame_mvs = !s->errorres && !last_invisible;
577 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
578 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
579 return AVERROR_INVALIDDATA;
581 if ((fmt = read_colorspace_details(ctx)) < 0)
583 // for profile 1, here follows the subsampling bits
584 s->refreshrefmask = 0xff;
585 w = get_bits(&s->gb, 16) + 1;
586 h = get_bits(&s->gb, 16) + 1;
587 if (get_bits1(&s->gb)) // display size
588 skip_bits(&s->gb, 32);
590 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
591 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
593 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
594 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
595 return AVERROR_INVALIDDATA;
597 if (ctx->profile == 1) {
598 if ((fmt = read_colorspace_details(ctx)) < 0)
601 s->ss_h = s->ss_v = 1;
604 s->bytesperpixel = 1;
605 fmt = AV_PIX_FMT_YUV420P;
606 ctx->colorspace = AVCOL_SPC_BT470BG;
607 ctx->color_range = AVCOL_RANGE_JPEG;
609 s->refreshrefmask = get_bits(&s->gb, 8);
610 w = get_bits(&s->gb, 16) + 1;
611 h = get_bits(&s->gb, 16) + 1;
612 if (get_bits1(&s->gb)) // display size
613 skip_bits(&s->gb, 32);
615 s->refreshrefmask = get_bits(&s->gb, 8);
616 s->refidx[0] = get_bits(&s->gb, 3);
617 s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
618 s->refidx[1] = get_bits(&s->gb, 3);
619 s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
620 s->refidx[2] = get_bits(&s->gb, 3);
621 s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
622 if (!s->refs[s->refidx[0]].f->data[0] ||
623 !s->refs[s->refidx[1]].f->data[0] ||
624 !s->refs[s->refidx[2]].f->data[0]) {
625 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
626 return AVERROR_INVALIDDATA;
628 if (get_bits1(&s->gb)) {
629 w = s->refs[s->refidx[0]].f->width;
630 h = s->refs[s->refidx[0]].f->height;
631 } else if (get_bits1(&s->gb)) {
632 w = s->refs[s->refidx[1]].f->width;
633 h = s->refs[s->refidx[1]].f->height;
634 } else if (get_bits1(&s->gb)) {
635 w = s->refs[s->refidx[2]].f->width;
636 h = s->refs[s->refidx[2]].f->height;
638 w = get_bits(&s->gb, 16) + 1;
639 h = get_bits(&s->gb, 16) + 1;
641 // Note that in this code, "CUR_FRAME" is actually before we
642 // have formally allocated a frame, and thus actually represents
644 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
645 s->frames[CUR_FRAME].tf.f->height == h;
646 if (get_bits1(&s->gb)) // display size
647 skip_bits(&s->gb, 32);
648 s->highprecisionmvs = get_bits1(&s->gb);
649 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
651 s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
652 s->signbias[0] != s->signbias[2]);
653 if (s->allowcompinter) {
654 if (s->signbias[0] == s->signbias[1]) {
656 s->varcompref[0] = 0;
657 s->varcompref[1] = 1;
658 } else if (s->signbias[0] == s->signbias[2]) {
660 s->varcompref[0] = 0;
661 s->varcompref[1] = 2;
664 s->varcompref[0] = 1;
665 s->varcompref[1] = 2;
669 for (i = 0; i < 3; i++) {
670 AVFrame *ref = s->refs[s->refidx[i]].f;
671 int refw = ref->width, refh = ref->height;
673 if (ref->format != fmt) {
674 av_log(ctx, AV_LOG_ERROR,
675 "Ref pixfmt (%s) did not match current frame (%s)",
676 av_get_pix_fmt_name(ref->format),
677 av_get_pix_fmt_name(fmt));
678 return AVERROR_INVALIDDATA;
679 } else if (refw == w && refh == h) {
680 s->mvscale[i][0] = s->mvscale[i][1] = 0;
682 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
683 av_log(ctx, AV_LOG_ERROR,
684 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
686 return AVERROR_INVALIDDATA;
688 s->mvscale[i][0] = (refw << 14) / w;
689 s->mvscale[i][1] = (refh << 14) / h;
690 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
691 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
696 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
697 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
698 s->framectxid = c = get_bits(&s->gb, 2);
700 /* loopfilter header data */
701 if (s->keyframe || s->errorres || s->intraonly) {
702 // reset loopfilter defaults
703 s->lf_delta.ref[0] = 1;
704 s->lf_delta.ref[1] = 0;
705 s->lf_delta.ref[2] = -1;
706 s->lf_delta.ref[3] = -1;
707 s->lf_delta.mode[0] = 0;
708 s->lf_delta.mode[1] = 0;
710 s->filter.level = get_bits(&s->gb, 6);
711 sharp = get_bits(&s->gb, 3);
712 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
713 // the old cache values since they are still valid
714 if (s->filter.sharpness != sharp)
715 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
716 s->filter.sharpness = sharp;
717 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
718 if (get_bits1(&s->gb)) {
719 for (i = 0; i < 4; i++)
720 if (get_bits1(&s->gb))
721 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
722 for (i = 0; i < 2; i++)
723 if (get_bits1(&s->gb))
724 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
728 /* quantization header data */
729 s->yac_qi = get_bits(&s->gb, 8);
730 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
731 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
732 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
733 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
734 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
736 /* segmentation header info */
737 s->segmentation.ignore_refmap = 0;
738 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
739 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
740 for (i = 0; i < 7; i++)
741 s->prob.seg[i] = get_bits1(&s->gb) ?
742 get_bits(&s->gb, 8) : 255;
743 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
744 for (i = 0; i < 3; i++)
745 s->prob.segpred[i] = get_bits1(&s->gb) ?
746 get_bits(&s->gb, 8) : 255;
749 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
750 (w != s->frames[CUR_FRAME].tf.f->width ||
751 h != s->frames[CUR_FRAME].tf.f->height)) {
752 av_log(ctx, AV_LOG_WARNING,
753 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
754 s->segmentation.temporal, s->segmentation.update_map);
755 s->segmentation.ignore_refmap = 1;
756 //return AVERROR_INVALIDDATA;
759 if (get_bits1(&s->gb)) {
760 s->segmentation.absolute_vals = get_bits1(&s->gb);
761 for (i = 0; i < 8; i++) {
762 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
763 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
764 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
765 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
766 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
767 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
768 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
772 s->segmentation.feat[0].q_enabled = 0;
773 s->segmentation.feat[0].lf_enabled = 0;
774 s->segmentation.feat[0].skip_enabled = 0;
775 s->segmentation.feat[0].ref_enabled = 0;
778 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
779 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
780 int qyac, qydc, quvac, quvdc, lflvl, sh;
782 if (s->segmentation.feat[i].q_enabled) {
783 if (s->segmentation.absolute_vals)
784 qyac = s->segmentation.feat[i].q_val;
786 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
790 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
791 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
792 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
793 qyac = av_clip_uintp2(qyac, 8);
795 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
796 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
797 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
798 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
800 sh = s->filter.level >= 32;
801 if (s->segmentation.feat[i].lf_enabled) {
802 if (s->segmentation.absolute_vals)
803 lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
805 lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
807 lflvl = s->filter.level;
809 if (s->lf_delta.enabled) {
810 s->segmentation.feat[i].lflvl[0][0] =
811 s->segmentation.feat[i].lflvl[0][1] =
812 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
813 for (j = 1; j < 4; j++) {
814 s->segmentation.feat[i].lflvl[j][0] =
815 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
816 s->lf_delta.mode[0]) * (1 << sh)), 6);
817 s->segmentation.feat[i].lflvl[j][1] =
818 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
819 s->lf_delta.mode[1]) * (1 << sh)), 6);
822 memset(s->segmentation.feat[i].lflvl, lflvl,
823 sizeof(s->segmentation.feat[i].lflvl));
828 if ((res = update_size(ctx, w, h, fmt)) < 0) {
829 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
832 for (s->tiling.log2_tile_cols = 0;
833 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
834 s->tiling.log2_tile_cols++) ;
835 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
836 max = FFMAX(0, max - 1);
837 while (max > s->tiling.log2_tile_cols) {
838 if (get_bits1(&s->gb))
839 s->tiling.log2_tile_cols++;
843 s->tiling.log2_tile_rows = decode012(&s->gb);
844 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
845 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
846 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
847 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
848 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
850 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
851 return AVERROR(ENOMEM);
855 if (s->keyframe || s->errorres || s->intraonly) {
856 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
857 s->prob_ctx[3].p = vp9_default_probs;
858 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
859 sizeof(vp9_default_coef_probs));
860 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
861 sizeof(vp9_default_coef_probs));
862 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
863 sizeof(vp9_default_coef_probs));
864 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
865 sizeof(vp9_default_coef_probs));
868 // next 16 bits is size of the rest of the header (arith-coded)
869 size2 = get_bits(&s->gb, 16);
870 data2 = align_get_bits(&s->gb);
871 if (size2 > size - (data2 - data)) {
872 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
873 return AVERROR_INVALIDDATA;
875 ff_vp56_init_range_decoder(&s->c, data2, size2);
876 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
877 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
878 return AVERROR_INVALIDDATA;
881 if (s->keyframe || s->intraonly) {
882 memset(s->counts.coef, 0, sizeof(s->counts.coef));
883 memset(s->counts.eob, 0, sizeof(s->counts.eob));
885 memset(&s->counts, 0, sizeof(s->counts));
887 // FIXME is it faster to not copy here, but do it down in the fw updates
888 // as explicit copies if the fw update is missing (and skip the copy upon
890 s->prob.p = s->prob_ctx[c].p;
894 s->txfmmode = TX_4X4;
896 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
897 if (s->txfmmode == 3)
898 s->txfmmode += vp8_rac_get(&s->c);
900 if (s->txfmmode == TX_SWITCHABLE) {
901 for (i = 0; i < 2; i++)
902 if (vp56_rac_get_prob_branchy(&s->c, 252))
903 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
904 for (i = 0; i < 2; i++)
905 for (j = 0; j < 2; j++)
906 if (vp56_rac_get_prob_branchy(&s->c, 252))
907 s->prob.p.tx16p[i][j] =
908 update_prob(&s->c, s->prob.p.tx16p[i][j]);
909 for (i = 0; i < 2; i++)
910 for (j = 0; j < 3; j++)
911 if (vp56_rac_get_prob_branchy(&s->c, 252))
912 s->prob.p.tx32p[i][j] =
913 update_prob(&s->c, s->prob.p.tx32p[i][j]);
918 for (i = 0; i < 4; i++) {
919 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
920 if (vp8_rac_get(&s->c)) {
921 for (j = 0; j < 2; j++)
922 for (k = 0; k < 2; k++)
923 for (l = 0; l < 6; l++)
924 for (m = 0; m < 6; m++) {
925 uint8_t *p = s->prob.coef[i][j][k][l][m];
926 uint8_t *r = ref[j][k][l][m];
927 if (m >= 3 && l == 0) // dc only has 3 pt
929 for (n = 0; n < 3; n++) {
930 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
931 p[n] = update_prob(&s->c, r[n]);
939 for (j = 0; j < 2; j++)
940 for (k = 0; k < 2; k++)
941 for (l = 0; l < 6; l++)
942 for (m = 0; m < 6; m++) {
943 uint8_t *p = s->prob.coef[i][j][k][l][m];
944 uint8_t *r = ref[j][k][l][m];
945 if (m > 3 && l == 0) // dc only has 3 pt
951 if (s->txfmmode == i)
956 for (i = 0; i < 3; i++)
957 if (vp56_rac_get_prob_branchy(&s->c, 252))
958 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
959 if (!s->keyframe && !s->intraonly) {
960 for (i = 0; i < 7; i++)
961 for (j = 0; j < 3; j++)
962 if (vp56_rac_get_prob_branchy(&s->c, 252))
963 s->prob.p.mv_mode[i][j] =
964 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
966 if (s->filtermode == FILTER_SWITCHABLE)
967 for (i = 0; i < 4; i++)
968 for (j = 0; j < 2; j++)
969 if (vp56_rac_get_prob_branchy(&s->c, 252))
970 s->prob.p.filter[i][j] =
971 update_prob(&s->c, s->prob.p.filter[i][j]);
973 for (i = 0; i < 4; i++)
974 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
977 if (s->allowcompinter) {
978 s->comppredmode = vp8_rac_get(&s->c);
980 s->comppredmode += vp8_rac_get(&s->c);
981 if (s->comppredmode == PRED_SWITCHABLE)
982 for (i = 0; i < 5; i++)
983 if (vp56_rac_get_prob_branchy(&s->c, 252))
985 update_prob(&s->c, s->prob.p.comp[i]);
987 s->comppredmode = PRED_SINGLEREF;
990 if (s->comppredmode != PRED_COMPREF) {
991 for (i = 0; i < 5; i++) {
992 if (vp56_rac_get_prob_branchy(&s->c, 252))
993 s->prob.p.single_ref[i][0] =
994 update_prob(&s->c, s->prob.p.single_ref[i][0]);
995 if (vp56_rac_get_prob_branchy(&s->c, 252))
996 s->prob.p.single_ref[i][1] =
997 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1001 if (s->comppredmode != PRED_SINGLEREF) {
1002 for (i = 0; i < 5; i++)
1003 if (vp56_rac_get_prob_branchy(&s->c, 252))
1004 s->prob.p.comp_ref[i] =
1005 update_prob(&s->c, s->prob.p.comp_ref[i]);
1008 for (i = 0; i < 4; i++)
1009 for (j = 0; j < 9; j++)
1010 if (vp56_rac_get_prob_branchy(&s->c, 252))
1011 s->prob.p.y_mode[i][j] =
1012 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1014 for (i = 0; i < 4; i++)
1015 for (j = 0; j < 4; j++)
1016 for (k = 0; k < 3; k++)
1017 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018 s->prob.p.partition[3 - i][j][k] =
1019 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1021 // mv fields don't use the update_prob subexp model for some reason
1022 for (i = 0; i < 3; i++)
1023 if (vp56_rac_get_prob_branchy(&s->c, 252))
1024 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1026 for (i = 0; i < 2; i++) {
1027 if (vp56_rac_get_prob_branchy(&s->c, 252))
1028 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1030 for (j = 0; j < 10; j++)
1031 if (vp56_rac_get_prob_branchy(&s->c, 252))
1032 s->prob.p.mv_comp[i].classes[j] =
1033 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1035 if (vp56_rac_get_prob_branchy(&s->c, 252))
1036 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1038 for (j = 0; j < 10; j++)
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_comp[i].bits[j] =
1041 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1044 for (i = 0; i < 2; i++) {
1045 for (j = 0; j < 2; j++)
1046 for (k = 0; k < 3; k++)
1047 if (vp56_rac_get_prob_branchy(&s->c, 252))
1048 s->prob.p.mv_comp[i].class0_fp[j][k] =
1049 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1051 for (j = 0; j < 3; j++)
1052 if (vp56_rac_get_prob_branchy(&s->c, 252))
1053 s->prob.p.mv_comp[i].fp[j] =
1054 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1057 if (s->highprecisionmvs) {
1058 for (i = 0; i < 2; i++) {
1059 if (vp56_rac_get_prob_branchy(&s->c, 252))
1060 s->prob.p.mv_comp[i].class0_hp =
1061 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1063 if (vp56_rac_get_prob_branchy(&s->c, 252))
1064 s->prob.p.mv_comp[i].hp =
1065 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1070 return (data2 - data) + size2;
1073 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1076 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1077 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1080 static void find_ref_mvs(VP9Context *s,
1081 VP56mv *pmv, int ref, int z, int idx, int sb)
1083 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1084 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1085 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1086 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1087 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1088 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1089 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1090 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1091 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1092 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1093 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1094 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1095 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1096 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1097 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1098 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1099 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1100 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1101 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1102 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1103 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1104 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1105 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1106 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1107 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1108 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1109 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1112 int row = s->row, col = s->col, row7 = s->row7;
1113 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1114 #define INVALID_MV 0x80008000U
1115 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1118 #define RETURN_DIRECT_MV(mv) \
1120 uint32_t m = AV_RN32A(&mv); \
1124 } else if (mem == INVALID_MV) { \
1126 } else if (m != mem) { \
1133 if (sb == 2 || sb == 1) {
1134 RETURN_DIRECT_MV(b->mv[0][z]);
1135 } else if (sb == 3) {
1136 RETURN_DIRECT_MV(b->mv[2][z]);
1137 RETURN_DIRECT_MV(b->mv[1][z]);
1138 RETURN_DIRECT_MV(b->mv[0][z]);
1141 #define RETURN_MV(mv) \
1146 av_assert2(idx == 1); \
1147 av_assert2(mem != INVALID_MV); \
1148 if (mem_sub8x8 == INVALID_MV) { \
1149 clamp_mv(&tmp, &mv, s); \
1150 m = AV_RN32A(&tmp); \
1155 mem_sub8x8 = AV_RN32A(&mv); \
1156 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1157 clamp_mv(&tmp, &mv, s); \
1158 m = AV_RN32A(&tmp); \
1162 /* BUG I'm pretty sure this isn't the intention */ \
1168 uint32_t m = AV_RN32A(&mv); \
1170 clamp_mv(pmv, &mv, s); \
1172 } else if (mem == INVALID_MV) { \
1174 } else if (m != mem) { \
1175 clamp_mv(pmv, &mv, s); \
1182 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1183 if (mv->ref[0] == ref) {
1184 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1185 } else if (mv->ref[1] == ref) {
1186 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1189 if (col > s->tiling.tile_col_start) {
1190 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1191 if (mv->ref[0] == ref) {
1192 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1193 } else if (mv->ref[1] == ref) {
1194 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1202 // previously coded MVs in this neighbourhood, using same reference frame
1203 for (; i < 8; i++) {
1204 int c = p[i][0] + col, r = p[i][1] + row;
1206 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1207 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1209 if (mv->ref[0] == ref) {
1210 RETURN_MV(mv->mv[0]);
1211 } else if (mv->ref[1] == ref) {
1212 RETURN_MV(mv->mv[1]);
1217 // MV at this position in previous frame, using same reference frame
1218 if (s->use_last_frame_mvs) {
1219 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1221 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1222 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1223 if (mv->ref[0] == ref) {
1224 RETURN_MV(mv->mv[0]);
1225 } else if (mv->ref[1] == ref) {
1226 RETURN_MV(mv->mv[1]);
1230 #define RETURN_SCALE_MV(mv, scale) \
1233 VP56mv mv_temp = { -mv.x, -mv.y }; \
1234 RETURN_MV(mv_temp); \
1240 // previously coded MVs in this neighbourhood, using different reference frame
1241 for (i = 0; i < 8; i++) {
1242 int c = p[i][0] + col, r = p[i][1] + row;
1244 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1245 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1247 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1248 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1250 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1251 // BUG - libvpx has this condition regardless of whether
1252 // we used the first ref MV and pre-scaling
1253 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1254 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1259 // MV at this position in previous frame, using different reference frame
1260 if (s->use_last_frame_mvs) {
1261 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1263 // no need to await_progress, because we already did that above
1264 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1265 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1267 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1268 // BUG - libvpx has this condition regardless of whether
1269 // we used the first ref MV and pre-scaling
1270 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1271 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1278 #undef RETURN_SCALE_MV
1281 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1283 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1284 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1285 s->prob.p.mv_comp[idx].classes);
1287 s->counts.mv_comp[idx].sign[sign]++;
1288 s->counts.mv_comp[idx].classes[c]++;
1292 for (n = 0, m = 0; m < c; m++) {
1293 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1295 s->counts.mv_comp[idx].bits[m][bit]++;
1298 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1300 s->counts.mv_comp[idx].fp[bit]++;
1302 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1303 s->counts.mv_comp[idx].hp[bit]++;
1307 // bug in libvpx - we count for bw entropy purposes even if the
1309 s->counts.mv_comp[idx].hp[1]++;
1313 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1314 s->counts.mv_comp[idx].class0[n]++;
1315 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1316 s->prob.p.mv_comp[idx].class0_fp[n]);
1317 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1318 n = (n << 3) | (bit << 1);
1320 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1321 s->counts.mv_comp[idx].class0_hp[bit]++;
1325 // bug in libvpx - we count for bw entropy purposes even if the
1327 s->counts.mv_comp[idx].class0_hp[1]++;
1331 return sign ? -(n + 1) : (n + 1);
1334 static void fill_mv(VP9Context *s,
1335 VP56mv *mv, int mode, int sb)
1339 if (mode == ZEROMV) {
1344 // FIXME cache this value and reuse for other subblocks
1345 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1346 mode == NEWMV ? -1 : sb);
1347 // FIXME maybe move this code into find_ref_mvs()
1348 if ((mode == NEWMV || sb == -1) &&
1349 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1363 if (mode == NEWMV) {
1364 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1365 s->prob.p.mv_joint);
1367 s->counts.mv_joint[j]++;
1368 if (j >= MV_JOINT_V)
1369 mv[0].y += read_mv_component(s, 0, hp);
1371 mv[0].x += read_mv_component(s, 1, hp);
1375 // FIXME cache this value and reuse for other subblocks
1376 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1377 mode == NEWMV ? -1 : sb);
1378 if ((mode == NEWMV || sb == -1) &&
1379 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1393 if (mode == NEWMV) {
1394 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1395 s->prob.p.mv_joint);
1397 s->counts.mv_joint[j]++;
1398 if (j >= MV_JOINT_V)
1399 mv[1].y += read_mv_component(s, 0, hp);
1401 mv[1].x += read_mv_component(s, 1, hp);
1407 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1408 ptrdiff_t stride, int v)
1418 int v16 = v * 0x0101;
1426 uint32_t v32 = v * 0x01010101;
1435 uint64_t v64 = v * 0x0101010101010101ULL;
1441 uint32_t v32 = v * 0x01010101;
1444 AV_WN32A(ptr + 4, v32);
1453 static void decode_mode(AVCodecContext *ctx)
1455 static const uint8_t left_ctx[N_BS_SIZES] = {
1456 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1458 static const uint8_t above_ctx[N_BS_SIZES] = {
1459 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1461 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1462 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1463 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1465 VP9Context *s = ctx->priv_data;
1467 int row = s->row, col = s->col, row7 = s->row7;
1468 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1469 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1470 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1471 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1472 int vref, filter_id;
1474 if (!s->segmentation.enabled) {
1476 } else if (s->keyframe || s->intraonly) {
1477 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1478 } else if (!s->segmentation.update_map ||
1479 (s->segmentation.temporal &&
1480 vp56_rac_get_prob_branchy(&s->c,
1481 s->prob.segpred[s->above_segpred_ctx[col] +
1482 s->left_segpred_ctx[row7]]))) {
1483 if (!s->errorres && !s->segmentation.ignore_refmap) {
1485 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1487 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1488 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1489 for (y = 0; y < h4; y++) {
1490 int idx_base = (y + row) * 8 * s->sb_cols + col;
1491 for (x = 0; x < w4; x++)
1492 pred = FFMIN(pred, refsegmap[idx_base + x]);
1494 av_assert1(pred < 8);
1500 memset(&s->above_segpred_ctx[col], 1, w4);
1501 memset(&s->left_segpred_ctx[row7], 1, h4);
1503 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1506 memset(&s->above_segpred_ctx[col], 0, w4);
1507 memset(&s->left_segpred_ctx[row7], 0, h4);
1509 if (s->segmentation.enabled &&
1510 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1511 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1512 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1515 b->skip = s->segmentation.enabled &&
1516 s->segmentation.feat[b->seg_id].skip_enabled;
1518 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1519 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1520 s->counts.skip[c][b->skip]++;
1523 if (s->keyframe || s->intraonly) {
1525 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1526 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1530 if (have_a && have_l) {
1531 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1534 c = have_a ? 2 * s->above_intra_ctx[col] :
1535 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1537 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1538 s->counts.intra[c][bit]++;
1542 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1546 c = (s->above_skip_ctx[col] ? max_tx :
1547 s->above_txfm_ctx[col]) +
1548 (s->left_skip_ctx[row7] ? max_tx :
1549 s->left_txfm_ctx[row7]) > max_tx;
1551 c = s->above_skip_ctx[col] ? 1 :
1552 (s->above_txfm_ctx[col] * 2 > max_tx);
1554 } else if (have_l) {
1555 c = s->left_skip_ctx[row7] ? 1 :
1556 (s->left_txfm_ctx[row7] * 2 > max_tx);
1562 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1564 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1566 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1568 s->counts.tx32p[c][b->tx]++;
1571 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1573 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1574 s->counts.tx16p[c][b->tx]++;
1577 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1578 s->counts.tx8p[c][b->tx]++;
1585 b->tx = FFMIN(max_tx, s->txfmmode);
1588 if (s->keyframe || s->intraonly) {
1589 uint8_t *a = &s->above_mode_ctx[col * 2];
1590 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1593 if (b->bs > BS_8x8) {
1594 // FIXME the memory storage intermediates here aren't really
1595 // necessary, they're just there to make the code slightly
1597 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1598 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1599 if (b->bs != BS_8x4) {
1600 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1601 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1602 l[0] = a[1] = b->mode[1];
1604 l[0] = a[1] = b->mode[1] = b->mode[0];
1606 if (b->bs != BS_4x8) {
1607 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1608 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1609 if (b->bs != BS_8x4) {
1610 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1611 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1612 l[1] = a[1] = b->mode[3];
1614 l[1] = a[1] = b->mode[3] = b->mode[2];
1617 b->mode[2] = b->mode[0];
1618 l[1] = a[1] = b->mode[3] = b->mode[1];
1621 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1622 vp9_default_kf_ymode_probs[*a][*l]);
1623 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1624 // FIXME this can probably be optimized
1625 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1626 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1628 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1629 vp9_default_kf_uvmode_probs[b->mode[3]]);
1630 } else if (b->intra) {
1632 if (b->bs > BS_8x8) {
1633 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1634 s->prob.p.y_mode[0]);
1635 s->counts.y_mode[0][b->mode[0]]++;
1636 if (b->bs != BS_8x4) {
1637 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1638 s->prob.p.y_mode[0]);
1639 s->counts.y_mode[0][b->mode[1]]++;
1641 b->mode[1] = b->mode[0];
1643 if (b->bs != BS_4x8) {
1644 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1645 s->prob.p.y_mode[0]);
1646 s->counts.y_mode[0][b->mode[2]]++;
1647 if (b->bs != BS_8x4) {
1648 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1649 s->prob.p.y_mode[0]);
1650 s->counts.y_mode[0][b->mode[3]]++;
1652 b->mode[3] = b->mode[2];
1655 b->mode[2] = b->mode[0];
1656 b->mode[3] = b->mode[1];
1659 static const uint8_t size_group[10] = {
1660 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1662 int sz = size_group[b->bs];
1664 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1665 s->prob.p.y_mode[sz]);
1666 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1667 s->counts.y_mode[sz][b->mode[3]]++;
1669 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1670 s->prob.p.uv_mode[b->mode[3]]);
1671 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1673 static const uint8_t inter_mode_ctx_lut[14][14] = {
1674 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1675 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1676 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1677 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1678 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1684 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1685 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1686 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1687 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1690 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1691 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1693 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1695 // read comp_pred flag
1696 if (s->comppredmode != PRED_SWITCHABLE) {
1697 b->comp = s->comppredmode == PRED_COMPREF;
1701 // FIXME add intra as ref=0xff (or -1) to make these easier?
1704 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1706 } else if (s->above_comp_ctx[col]) {
1707 c = 2 + (s->left_intra_ctx[row7] ||
1708 s->left_ref_ctx[row7] == s->fixcompref);
1709 } else if (s->left_comp_ctx[row7]) {
1710 c = 2 + (s->above_intra_ctx[col] ||
1711 s->above_ref_ctx[col] == s->fixcompref);
1713 c = (!s->above_intra_ctx[col] &&
1714 s->above_ref_ctx[col] == s->fixcompref) ^
1715 (!s->left_intra_ctx[row7] &&
1716 s->left_ref_ctx[row & 7] == s->fixcompref);
1719 c = s->above_comp_ctx[col] ? 3 :
1720 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1722 } else if (have_l) {
1723 c = s->left_comp_ctx[row7] ? 3 :
1724 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1728 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1729 s->counts.comp[c][b->comp]++;
1732 // read actual references
1733 // FIXME probably cache a few variables here to prevent repetitive
1734 // memory accesses below
1735 if (b->comp) /* two references */ {
1736 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1738 b->ref[fix_idx] = s->fixcompref;
1739 // FIXME can this codeblob be replaced by some sort of LUT?
1742 if (s->above_intra_ctx[col]) {
1743 if (s->left_intra_ctx[row7]) {
1746 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1748 } else if (s->left_intra_ctx[row7]) {
1749 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1751 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1753 if (refl == refa && refa == s->varcompref[1]) {
1755 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1756 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1757 (refl == s->fixcompref && refa == s->varcompref[0])) {
1760 c = (refa == refl) ? 3 : 1;
1762 } else if (!s->left_comp_ctx[row7]) {
1763 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1766 c = (refl == s->varcompref[1] &&
1767 refa != s->varcompref[1]) ? 2 : 4;
1769 } else if (!s->above_comp_ctx[col]) {
1770 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1773 c = (refa == s->varcompref[1] &&
1774 refl != s->varcompref[1]) ? 2 : 4;
1777 c = (refl == refa) ? 4 : 2;
1781 if (s->above_intra_ctx[col]) {
1783 } else if (s->above_comp_ctx[col]) {
1784 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1786 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1789 } else if (have_l) {
1790 if (s->left_intra_ctx[row7]) {
1792 } else if (s->left_comp_ctx[row7]) {
1793 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1795 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1800 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1801 b->ref[var_idx] = s->varcompref[bit];
1802 s->counts.comp_ref[c][bit]++;
1803 } else /* single reference */ {
1806 if (have_a && !s->above_intra_ctx[col]) {
1807 if (have_l && !s->left_intra_ctx[row7]) {
1808 if (s->left_comp_ctx[row7]) {
1809 if (s->above_comp_ctx[col]) {
1810 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1811 !s->above_ref_ctx[col]);
1813 c = (3 * !s->above_ref_ctx[col]) +
1814 (!s->fixcompref || !s->left_ref_ctx[row7]);
1816 } else if (s->above_comp_ctx[col]) {
1817 c = (3 * !s->left_ref_ctx[row7]) +
1818 (!s->fixcompref || !s->above_ref_ctx[col]);
1820 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1822 } else if (s->above_intra_ctx[col]) {
1824 } else if (s->above_comp_ctx[col]) {
1825 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1827 c = 4 * (!s->above_ref_ctx[col]);
1829 } else if (have_l && !s->left_intra_ctx[row7]) {
1830 if (s->left_intra_ctx[row7]) {
1832 } else if (s->left_comp_ctx[row7]) {
1833 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1835 c = 4 * (!s->left_ref_ctx[row7]);
1840 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1841 s->counts.single_ref[c][0][bit]++;
1845 // FIXME can this codeblob be replaced by some sort of LUT?
1848 if (s->left_intra_ctx[row7]) {
1849 if (s->above_intra_ctx[col]) {
1851 } else if (s->above_comp_ctx[col]) {
1852 c = 1 + 2 * (s->fixcompref == 1 ||
1853 s->above_ref_ctx[col] == 1);
1854 } else if (!s->above_ref_ctx[col]) {
1857 c = 4 * (s->above_ref_ctx[col] == 1);
1859 } else if (s->above_intra_ctx[col]) {
1860 if (s->left_intra_ctx[row7]) {
1862 } else if (s->left_comp_ctx[row7]) {
1863 c = 1 + 2 * (s->fixcompref == 1 ||
1864 s->left_ref_ctx[row7] == 1);
1865 } else if (!s->left_ref_ctx[row7]) {
1868 c = 4 * (s->left_ref_ctx[row7] == 1);
1870 } else if (s->above_comp_ctx[col]) {
1871 if (s->left_comp_ctx[row7]) {
1872 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1873 c = 3 * (s->fixcompref == 1 ||
1874 s->left_ref_ctx[row7] == 1);
1878 } else if (!s->left_ref_ctx[row7]) {
1879 c = 1 + 2 * (s->fixcompref == 1 ||
1880 s->above_ref_ctx[col] == 1);
1882 c = 3 * (s->left_ref_ctx[row7] == 1) +
1883 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1885 } else if (s->left_comp_ctx[row7]) {
1886 if (!s->above_ref_ctx[col]) {
1887 c = 1 + 2 * (s->fixcompref == 1 ||
1888 s->left_ref_ctx[row7] == 1);
1890 c = 3 * (s->above_ref_ctx[col] == 1) +
1891 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1893 } else if (!s->above_ref_ctx[col]) {
1894 if (!s->left_ref_ctx[row7]) {
1897 c = 4 * (s->left_ref_ctx[row7] == 1);
1899 } else if (!s->left_ref_ctx[row7]) {
1900 c = 4 * (s->above_ref_ctx[col] == 1);
1902 c = 2 * (s->left_ref_ctx[row7] == 1) +
1903 2 * (s->above_ref_ctx[col] == 1);
1906 if (s->above_intra_ctx[col] ||
1907 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1909 } else if (s->above_comp_ctx[col]) {
1910 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1912 c = 4 * (s->above_ref_ctx[col] == 1);
1915 } else if (have_l) {
1916 if (s->left_intra_ctx[row7] ||
1917 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1919 } else if (s->left_comp_ctx[row7]) {
1920 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1922 c = 4 * (s->left_ref_ctx[row7] == 1);
1927 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1928 s->counts.single_ref[c][1][bit]++;
1929 b->ref[0] = 1 + bit;
1934 if (b->bs <= BS_8x8) {
1935 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1936 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1938 static const uint8_t off[10] = {
1939 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1942 // FIXME this needs to use the LUT tables from find_ref_mvs
1943 // because not all are -1,0/0,-1
1944 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1945 [s->left_mode_ctx[row7 + off[b->bs]]];
1947 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1948 s->prob.p.mv_mode[c]);
1949 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1950 s->counts.mv_mode[c][b->mode[0] - 10]++;
1954 if (s->filtermode == FILTER_SWITCHABLE) {
1957 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1958 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1959 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1960 s->left_filter_ctx[row7] : 3;
1962 c = s->above_filter_ctx[col];
1964 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1965 c = s->left_filter_ctx[row7];
1970 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1971 s->prob.p.filter[c]);
1972 s->counts.filter[c][filter_id]++;
1973 b->filter = vp9_filter_lut[filter_id];
1975 b->filter = s->filtermode;
1978 if (b->bs > BS_8x8) {
1979 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1981 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1982 s->prob.p.mv_mode[c]);
1983 s->counts.mv_mode[c][b->mode[0] - 10]++;
1984 fill_mv(s, b->mv[0], b->mode[0], 0);
1986 if (b->bs != BS_8x4) {
1987 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1988 s->prob.p.mv_mode[c]);
1989 s->counts.mv_mode[c][b->mode[1] - 10]++;
1990 fill_mv(s, b->mv[1], b->mode[1], 1);
1992 b->mode[1] = b->mode[0];
1993 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1994 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1997 if (b->bs != BS_4x8) {
1998 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1999 s->prob.p.mv_mode[c]);
2000 s->counts.mv_mode[c][b->mode[2] - 10]++;
2001 fill_mv(s, b->mv[2], b->mode[2], 2);
2003 if (b->bs != BS_8x4) {
2004 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2005 s->prob.p.mv_mode[c]);
2006 s->counts.mv_mode[c][b->mode[3] - 10]++;
2007 fill_mv(s, b->mv[3], b->mode[3], 3);
2009 b->mode[3] = b->mode[2];
2010 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2011 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2014 b->mode[2] = b->mode[0];
2015 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2016 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2017 b->mode[3] = b->mode[1];
2018 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2019 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2022 fill_mv(s, b->mv[0], b->mode[0], -1);
2023 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2024 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2025 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2026 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2027 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2028 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2031 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2035 #define SPLAT_CTX(var, val, n) \
2037 case 1: var = val; break; \
2038 case 2: AV_WN16A(&var, val * 0x0101); break; \
2039 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2040 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2042 uint64_t v64 = val * 0x0101010101010101ULL; \
2043 AV_WN64A( &var, v64); \
2044 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2049 #define SPLAT_CTX(var, val, n) \
2051 case 1: var = val; break; \
2052 case 2: AV_WN16A(&var, val * 0x0101); break; \
2053 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2055 uint32_t v32 = val * 0x01010101; \
2056 AV_WN32A( &var, v32); \
2057 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2061 uint32_t v32 = val * 0x01010101; \
2062 AV_WN32A( &var, v32); \
2063 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2064 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2065 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2071 switch (bwh_tab[1][b->bs][0]) {
2072 #define SET_CTXS(dir, off, n) \
2074 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2075 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2076 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2077 if (!s->keyframe && !s->intraonly) { \
2078 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2079 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2080 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2082 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2083 if (s->filtermode == FILTER_SWITCHABLE) { \
2084 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2089 case 1: SET_CTXS(above, col, 1); break;
2090 case 2: SET_CTXS(above, col, 2); break;
2091 case 4: SET_CTXS(above, col, 4); break;
2092 case 8: SET_CTXS(above, col, 8); break;
2094 switch (bwh_tab[1][b->bs][1]) {
2095 case 1: SET_CTXS(left, row7, 1); break;
2096 case 2: SET_CTXS(left, row7, 2); break;
2097 case 4: SET_CTXS(left, row7, 4); break;
2098 case 8: SET_CTXS(left, row7, 8); break;
2103 if (!s->keyframe && !s->intraonly) {
2104 if (b->bs > BS_8x8) {
2105 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2107 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2108 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2109 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2110 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2111 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2112 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2113 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2114 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2116 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2118 for (n = 0; n < w4 * 2; n++) {
2119 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2120 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2122 for (n = 0; n < h4 * 2; n++) {
2123 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2124 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2130 for (y = 0; y < h4; y++) {
2131 int x, o = (row + y) * s->sb_cols * 8 + col;
2132 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2135 for (x = 0; x < w4; x++) {
2139 } else if (b->comp) {
2140 for (x = 0; x < w4; x++) {
2141 mv[x].ref[0] = b->ref[0];
2142 mv[x].ref[1] = b->ref[1];
2143 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2144 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2147 for (x = 0; x < w4; x++) {
2148 mv[x].ref[0] = b->ref[0];
2150 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2156 // FIXME merge cnt/eob arguments?
2157 static av_always_inline int
2158 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2159 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2160 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2161 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2162 const int16_t *band_counts, const int16_t *qmul)
2164 int i = 0, band = 0, band_left = band_counts[band];
2165 uint8_t *tp = p[0][nnz];
2166 uint8_t cache[1024];
2171 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2172 eob[band][nnz][val]++;
2177 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2178 cnt[band][nnz][0]++;
2180 band_left = band_counts[++band];
2182 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2184 if (++i == n_coeffs)
2185 break; //invalid input; blocks should end with EOB
2190 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2191 cnt[band][nnz][1]++;
2195 // fill in p[3-10] (model fill) - only once per frame for each pos
2197 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2199 cnt[band][nnz][2]++;
2200 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2201 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2202 cache[rc] = val = 2;
2204 val = 3 + vp56_rac_get_prob(c, tp[5]);
2207 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2209 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2210 val = 5 + vp56_rac_get_prob(c, 159);
2212 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2213 val += vp56_rac_get_prob(c, 145);
2217 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2218 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2219 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2220 val += (vp56_rac_get_prob(c, 148) << 1);
2221 val += vp56_rac_get_prob(c, 140);
2223 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2224 val += (vp56_rac_get_prob(c, 155) << 2);
2225 val += (vp56_rac_get_prob(c, 140) << 1);
2226 val += vp56_rac_get_prob(c, 135);
2228 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2229 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2230 val += (vp56_rac_get_prob(c, 157) << 3);
2231 val += (vp56_rac_get_prob(c, 141) << 2);
2232 val += (vp56_rac_get_prob(c, 134) << 1);
2233 val += vp56_rac_get_prob(c, 130);
2236 if (!is8bitsperpixel) {
2238 val += vp56_rac_get_prob(c, 255) << 17;
2239 val += vp56_rac_get_prob(c, 255) << 16;
2241 val += (vp56_rac_get_prob(c, 255) << 15);
2242 val += (vp56_rac_get_prob(c, 255) << 14);
2244 val += (vp56_rac_get_prob(c, 254) << 13);
2245 val += (vp56_rac_get_prob(c, 254) << 12);
2246 val += (vp56_rac_get_prob(c, 254) << 11);
2247 val += (vp56_rac_get_prob(c, 252) << 10);
2248 val += (vp56_rac_get_prob(c, 249) << 9);
2249 val += (vp56_rac_get_prob(c, 243) << 8);
2250 val += (vp56_rac_get_prob(c, 230) << 7);
2251 val += (vp56_rac_get_prob(c, 196) << 6);
2252 val += (vp56_rac_get_prob(c, 177) << 5);
2253 val += (vp56_rac_get_prob(c, 153) << 4);
2254 val += (vp56_rac_get_prob(c, 140) << 3);
2255 val += (vp56_rac_get_prob(c, 133) << 2);
2256 val += (vp56_rac_get_prob(c, 130) << 1);
2257 val += vp56_rac_get_prob(c, 129);
2261 #define STORE_COEF(c, i, v) do { \
2262 if (is8bitsperpixel) { \
2265 AV_WN32A(&c[i * 2], v); \
2269 band_left = band_counts[++band];
2271 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2273 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2274 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2276 } while (++i < n_coeffs);
2281 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2282 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2283 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2284 const int16_t (*nb)[2], const int16_t *band_counts,
2285 const int16_t *qmul)
2287 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2288 nnz, scan, nb, band_counts, qmul);
2291 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2292 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2293 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2294 const int16_t (*nb)[2], const int16_t *band_counts,
2295 const int16_t *qmul)
2297 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2298 nnz, scan, nb, band_counts, qmul);
2301 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2302 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2303 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2304 const int16_t (*nb)[2], const int16_t *band_counts,
2305 const int16_t *qmul)
2307 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2308 nnz, scan, nb, band_counts, qmul);
2311 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2312 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2313 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2314 const int16_t (*nb)[2], const int16_t *band_counts,
2315 const int16_t *qmul)
2317 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2318 nnz, scan, nb, band_counts, qmul);
2321 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2323 VP9Context *s = ctx->priv_data;
2325 int row = s->row, col = s->col;
2326 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2327 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2328 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2329 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2330 int end_x = FFMIN(2 * (s->cols - col), w4);
2331 int end_y = FFMIN(2 * (s->rows - row), h4);
2332 int n, pl, x, y, res;
2333 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2334 int tx = 4 * s->lossless + b->tx;
2335 const int16_t * const *yscans = vp9_scans[tx];
2336 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2337 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2338 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2339 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2340 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2341 static const int16_t band_counts[4][8] = {
2342 { 1, 2, 3, 4, 3, 16 - 13 },
2343 { 1, 2, 3, 4, 11, 64 - 21 },
2344 { 1, 2, 3, 4, 11, 256 - 21 },
2345 { 1, 2, 3, 4, 11, 1024 - 21 },
2347 const int16_t *y_band_counts = band_counts[b->tx];
2348 const int16_t *uv_band_counts = band_counts[b->uvtx];
2349 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2350 int total_coeff = 0;
2352 #define MERGE(la, end, step, rd) \
2353 for (n = 0; n < end; n += step) \
2354 la[n] = !!rd(&la[n])
2355 #define MERGE_CTX(step, rd) \
2357 MERGE(l, end_y, step, rd); \
2358 MERGE(a, end_x, step, rd); \
2361 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2362 for (n = 0, y = 0; y < end_y; y += step) { \
2363 for (x = 0; x < end_x; x += step, n += step * step) { \
2364 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2365 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2366 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2367 c, e, p, a[x] + l[y], yscans[txtp], \
2368 ynbs[txtp], y_band_counts, qmul[0]); \
2369 a[x] = l[y] = !!res; \
2370 total_coeff |= !!res; \
2372 AV_WN16A(&s->eob[n], res); \
2379 #define SPLAT(la, end, step, cond) \
2381 for (n = 1; n < end; n += step) \
2382 la[n] = la[n - 1]; \
2383 } else if (step == 4) { \
2385 for (n = 0; n < end; n += step) \
2386 AV_WN32A(&la[n], la[n] * 0x01010101); \
2388 for (n = 0; n < end; n += step) \
2389 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2391 } else /* step == 8 */ { \
2393 if (HAVE_FAST_64BIT) { \
2394 for (n = 0; n < end; n += step) \
2395 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2397 for (n = 0; n < end; n += step) { \
2398 uint32_t v32 = la[n] * 0x01010101; \
2399 AV_WN32A(&la[n], v32); \
2400 AV_WN32A(&la[n + 4], v32); \
2404 for (n = 0; n < end; n += step) \
2405 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2408 #define SPLAT_CTX(step) \
2410 SPLAT(a, end_x, step, end_x == w4); \
2411 SPLAT(l, end_y, step, end_y == h4); \
2417 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2420 MERGE_CTX(2, AV_RN16A);
2421 DECODE_Y_COEF_LOOP(2, 0,);
2425 MERGE_CTX(4, AV_RN32A);
2426 DECODE_Y_COEF_LOOP(4, 0,);
2430 MERGE_CTX(8, AV_RN64A);
2431 DECODE_Y_COEF_LOOP(8, 0, 32);
2436 #define DECODE_UV_COEF_LOOP(step, v) \
2437 for (n = 0, y = 0; y < end_y; y += step) { \
2438 for (x = 0; x < end_x; x += step, n += step * step) { \
2439 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2440 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2441 16 * step * step, c, e, p, a[x] + l[y], \
2442 uvscan, uvnb, uv_band_counts, qmul[1]); \
2443 a[x] = l[y] = !!res; \
2444 total_coeff |= !!res; \
2446 AV_WN16A(&s->uveob[pl][n], res); \
2448 s->uveob[pl][n] = res; \
2453 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2454 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2455 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2460 for (pl = 0; pl < 2; pl++) {
2461 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2462 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2465 DECODE_UV_COEF_LOOP(1,);
2468 MERGE_CTX(2, AV_RN16A);
2469 DECODE_UV_COEF_LOOP(2,);
2473 MERGE_CTX(4, AV_RN32A);
2474 DECODE_UV_COEF_LOOP(4,);
2478 MERGE_CTX(8, AV_RN64A);
2479 DECODE_UV_COEF_LOOP(8, 32);
2488 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2490 return decode_coeffs(ctx, 1);
2493 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2495 return decode_coeffs(ctx, 0);
2498 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2499 uint8_t *dst_edge, ptrdiff_t stride_edge,
2500 uint8_t *dst_inner, ptrdiff_t stride_inner,
2501 uint8_t *l, int col, int x, int w,
2502 int row, int y, enum TxfmMode tx,
2503 int p, int ss_h, int ss_v, int bytesperpixel)
2505 int have_top = row > 0 || y > 0;
2506 int have_left = col > s->tiling.tile_col_start || x > 0;
2507 int have_right = x < w - 1;
2509 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2510 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2511 { DC_127_PRED, VERT_PRED } },
2512 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2513 { HOR_PRED, HOR_PRED } },
2514 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2515 { LEFT_DC_PRED, DC_PRED } },
2516 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2517 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2518 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2519 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2520 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2521 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2522 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2523 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2524 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2525 { DC_127_PRED, VERT_LEFT_PRED } },
2526 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2527 { HOR_UP_PRED, HOR_UP_PRED } },
2528 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2529 { HOR_PRED, TM_VP8_PRED } },
2531 static const struct {
2532 uint8_t needs_left:1;
2533 uint8_t needs_top:1;
2534 uint8_t needs_topleft:1;
2535 uint8_t needs_topright:1;
2536 uint8_t invert_left:1;
2537 } edges[N_INTRA_PRED_MODES] = {
2538 [VERT_PRED] = { .needs_top = 1 },
2539 [HOR_PRED] = { .needs_left = 1 },
2540 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2541 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2542 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2543 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2544 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2545 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2546 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2547 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2548 [LEFT_DC_PRED] = { .needs_left = 1 },
2549 [TOP_DC_PRED] = { .needs_top = 1 },
2550 [DC_128_PRED] = { 0 },
2551 [DC_127_PRED] = { 0 },
2552 [DC_129_PRED] = { 0 }
2555 av_assert2(mode >= 0 && mode < 10);
2556 mode = mode_conv[mode][have_left][have_top];
2557 if (edges[mode].needs_top) {
2558 uint8_t *top, *topleft;
2559 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2560 int n_px_need_tr = 0;
2562 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2565 // if top of sb64-row, use s->intra_pred_data[] instead of
2566 // dst[-stride] for intra prediction (it contains pre- instead of
2567 // post-loopfilter data)
2569 top = !(row & 7) && !y ?
2570 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2571 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2573 topleft = !(row & 7) && !y ?
2574 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2575 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2576 &dst_inner[-stride_inner];
2580 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2581 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2582 n_px_need + n_px_need_tr <= n_px_have) {
2586 if (n_px_need <= n_px_have) {
2587 memcpy(*a, top, n_px_need * bytesperpixel);
2589 #define memset_bpp(c, i1, v, i2, num) do { \
2590 if (bytesperpixel == 1) { \
2591 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2593 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2594 for (n = 0; n < (num); n++) { \
2595 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2599 memcpy(*a, top, n_px_have * bytesperpixel);
2600 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2603 #define memset_val(c, val, num) do { \
2604 if (bytesperpixel == 1) { \
2605 memset((c), (val), (num)); \
2608 for (n = 0; n < (num); n++) { \
2609 AV_WN16A(&(c)[n * 2], (val)); \
2613 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2615 if (edges[mode].needs_topleft) {
2616 if (have_left && have_top) {
2617 #define assign_bpp(c, i1, v, i2) do { \
2618 if (bytesperpixel == 1) { \
2619 (c)[(i1)] = (v)[(i2)]; \
2621 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2624 assign_bpp(*a, -1, topleft, -1);
2626 #define assign_val(c, i, v) do { \
2627 if (bytesperpixel == 1) { \
2630 AV_WN16A(&(c)[(i) * 2], (v)); \
2633 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2636 if (tx == TX_4X4 && edges[mode].needs_topright) {
2637 if (have_top && have_right &&
2638 n_px_need + n_px_need_tr <= n_px_have) {
2639 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2641 memset_bpp(*a, 4, *a, 3, 4);
2646 if (edges[mode].needs_left) {
2648 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2649 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2650 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2652 if (edges[mode].invert_left) {
2653 if (n_px_need <= n_px_have) {
2654 for (i = 0; i < n_px_need; i++)
2655 assign_bpp(l, i, &dst[i * stride], -1);
2657 for (i = 0; i < n_px_have; i++)
2658 assign_bpp(l, i, &dst[i * stride], -1);
2659 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2662 if (n_px_need <= n_px_have) {
2663 for (i = 0; i < n_px_need; i++)
2664 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2666 for (i = 0; i < n_px_have; i++)
2667 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2668 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2672 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2679 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2680 ptrdiff_t uv_off, int bytesperpixel)
2682 VP9Context *s = ctx->priv_data;
2684 int row = s->row, col = s->col;
2685 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2686 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2687 int end_x = FFMIN(2 * (s->cols - col), w4);
2688 int end_y = FFMIN(2 * (s->rows - row), h4);
2689 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2690 int uvstep1d = 1 << b->uvtx, p;
2691 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2692 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2693 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2695 for (n = 0, y = 0; y < end_y; y += step1d) {
2696 uint8_t *ptr = dst, *ptr_r = dst_r;
2697 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2698 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2699 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2701 uint8_t *a = &a_buf[32];
2702 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2703 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2705 mode = check_intra_mode(s, mode, &a, ptr_r,
2706 s->frames[CUR_FRAME].tf.f->linesize[0],
2707 ptr, s->y_stride, l,
2708 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2709 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2711 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2712 s->block + 16 * n * bytesperpixel, eob);
2714 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2715 dst += 4 * step1d * s->y_stride;
2722 step = 1 << (b->uvtx * 2);
2723 for (p = 0; p < 2; p++) {
2724 dst = s->dst[1 + p];
2725 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2726 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2727 uint8_t *ptr = dst, *ptr_r = dst_r;
2728 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2729 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2730 int mode = b->uvmode;
2731 uint8_t *a = &a_buf[32];
2732 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2734 mode = check_intra_mode(s, mode, &a, ptr_r,
2735 s->frames[CUR_FRAME].tf.f->linesize[1],
2736 ptr, s->uv_stride, l, col, x, w4, row, y,
2737 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2738 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2740 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2741 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2743 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2744 dst += 4 * uvstep1d * s->uv_stride;
2749 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2751 intra_recon(ctx, y_off, uv_off, 1);
2754 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2756 intra_recon(ctx, y_off, uv_off, 2);
2759 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2760 uint8_t *dst, ptrdiff_t dst_stride,
2761 const uint8_t *ref, ptrdiff_t ref_stride,
2762 ThreadFrame *ref_frame,
2763 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2764 int px, int py, int pw, int ph,
2765 int bw, int bh, int w, int h, int bytesperpixel,
2766 const uint16_t *scale, const uint8_t *step)
2768 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2770 int refbw_m1, refbh_m1;
2774 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2775 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2776 // BUG libvpx seems to scale the two components separately. This introduces
2777 // rounding errors but we have to reproduce them to be exactly compatible
2778 // with the output from libvpx...
2779 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2780 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2784 ref += y * ref_stride + x * bytesperpixel;
2787 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2788 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2789 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2790 // we use +7 because the last 7 pixels of each sbrow can be changed in
2791 // the longest loopfilter of the next sbrow
2792 th = (y + refbh_m1 + 4 + 7) >> 6;
2793 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2794 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2795 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2796 ref - 3 * ref_stride - 3 * bytesperpixel,
2798 refbw_m1 + 8, refbh_m1 + 8,
2799 x - 3, y - 3, w, h);
2800 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2803 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2806 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2807 uint8_t *dst_u, uint8_t *dst_v,
2808 ptrdiff_t dst_stride,
2809 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2810 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2811 ThreadFrame *ref_frame,
2812 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2813 int px, int py, int pw, int ph,
2814 int bw, int bh, int w, int h, int bytesperpixel,
2815 const uint16_t *scale, const uint8_t *step)
2818 int refbw_m1, refbh_m1;
2823 // BUG https://code.google.com/p/webm/issues/detail?id=820
2824 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2825 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2827 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2828 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2831 // BUG https://code.google.com/p/webm/issues/detail?id=820
2832 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2833 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2835 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2836 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2841 ref_u += y * src_stride_u + x * bytesperpixel;
2842 ref_v += y * src_stride_v + x * bytesperpixel;
2845 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2846 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2847 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2848 // we use +7 because the last 7 pixels of each sbrow can be changed in
2849 // the longest loopfilter of the next sbrow
2850 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2851 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2852 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2853 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2854 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2856 refbw_m1 + 8, refbh_m1 + 8,
2857 x - 3, y - 3, w, h);
2858 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2859 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2861 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2862 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2864 refbw_m1 + 8, refbh_m1 + 8,
2865 x - 3, y - 3, w, h);
2866 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2867 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2869 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2870 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2874 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2875 px, py, pw, ph, bw, bh, w, h, i) \
2876 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2877 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2878 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2879 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2880 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2881 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2882 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2883 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2885 #define FN(x) x##_scaled_8bpp
2886 #define BYTES_PER_PIXEL 1
2887 #include "vp9_mc_template.c"
2889 #undef BYTES_PER_PIXEL
2890 #define FN(x) x##_scaled_16bpp
2891 #define BYTES_PER_PIXEL 2
2892 #include "vp9_mc_template.c"
2894 #undef mc_chroma_dir
2896 #undef BYTES_PER_PIXEL
2899 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2900 uint8_t *dst, ptrdiff_t dst_stride,
2901 const uint8_t *ref, ptrdiff_t ref_stride,
2902 ThreadFrame *ref_frame,
2903 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2904 int bw, int bh, int w, int h, int bytesperpixel)
2906 int mx = mv->x, my = mv->y, th;
2910 ref += y * ref_stride + x * bytesperpixel;
2913 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2914 // we use +7 because the last 7 pixels of each sbrow can be changed in
2915 // the longest loopfilter of the next sbrow
2916 th = (y + bh + 4 * !!my + 7) >> 6;
2917 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2918 if (x < !!mx * 3 || y < !!my * 3 ||
2919 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2920 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2921 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2923 bw + !!mx * 7, bh + !!my * 7,
2924 x - !!mx * 3, y - !!my * 3, w, h);
2925 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2928 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2931 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2932 uint8_t *dst_u, uint8_t *dst_v,
2933 ptrdiff_t dst_stride,
2934 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2935 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2936 ThreadFrame *ref_frame,
2937 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2938 int bw, int bh, int w, int h, int bytesperpixel)
2940 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2944 ref_u += y * src_stride_u + x * bytesperpixel;
2945 ref_v += y * src_stride_v + x * bytesperpixel;
2948 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2949 // we use +7 because the last 7 pixels of each sbrow can be changed in
2950 // the longest loopfilter of the next sbrow
2951 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2952 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2953 if (x < !!mx * 3 || y < !!my * 3 ||
2954 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2955 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2956 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2958 bw + !!mx * 7, bh + !!my * 7,
2959 x - !!mx * 3, y - !!my * 3, w, h);
2960 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2961 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2963 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2964 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2966 bw + !!mx * 7, bh + !!my * 7,
2967 x - !!mx * 3, y - !!my * 3, w, h);
2968 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2969 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2971 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2972 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2976 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2977 px, py, pw, ph, bw, bh, w, h, i) \
2978 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2979 mv, bw, bh, w, h, bytesperpixel)
2980 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2981 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2982 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2983 row, col, mv, bw, bh, w, h, bytesperpixel)
2985 #define FN(x) x##_8bpp
2986 #define BYTES_PER_PIXEL 1
2987 #include "vp9_mc_template.c"
2989 #undef BYTES_PER_PIXEL
2990 #define FN(x) x##_16bpp
2991 #define BYTES_PER_PIXEL 2
2992 #include "vp9_mc_template.c"
2993 #undef mc_luma_dir_dir
2994 #undef mc_chroma_dir_dir
2996 #undef BYTES_PER_PIXEL
2999 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3001 VP9Context *s = ctx->priv_data;
3003 int row = s->row, col = s->col;
3005 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3006 if (bytesperpixel == 1) {
3007 inter_pred_scaled_8bpp(ctx);
3009 inter_pred_scaled_16bpp(ctx);
3012 if (bytesperpixel == 1) {
3013 inter_pred_8bpp(ctx);
3015 inter_pred_16bpp(ctx);
3019 /* mostly copied intra_recon() */
3021 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3022 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3023 int end_x = FFMIN(2 * (s->cols - col), w4);
3024 int end_y = FFMIN(2 * (s->rows - row), h4);
3025 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3026 int uvstep1d = 1 << b->uvtx, p;
3027 uint8_t *dst = s->dst[0];
3030 for (n = 0, y = 0; y < end_y; y += step1d) {
3032 for (x = 0; x < end_x; x += step1d,
3033 ptr += 4 * step1d * bytesperpixel, n += step) {
3034 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3037 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3038 s->block + 16 * n * bytesperpixel, eob);
3040 dst += 4 * s->y_stride * step1d;
3046 step = 1 << (b->uvtx * 2);
3047 for (p = 0; p < 2; p++) {
3048 dst = s->dst[p + 1];
3049 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3051 for (x = 0; x < end_x; x += uvstep1d,
3052 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3053 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3056 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3057 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3059 dst += 4 * uvstep1d * s->uv_stride;
3065 static void inter_recon_8bpp(AVCodecContext *ctx)
3067 inter_recon(ctx, 1);
3070 static void inter_recon_16bpp(AVCodecContext *ctx)
3072 inter_recon(ctx, 2);
3075 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3076 int row_and_7, int col_and_7,
3077 int w, int h, int col_end, int row_end,
3078 enum TxfmMode tx, int skip_inter)
3080 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3081 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3083 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3084 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3085 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3086 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3088 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3089 // edges. This means that for UV, we work on two subsampled blocks at
3090 // a time, and we only use the topleft block's mode information to set
3091 // things like block strength. Thus, for any block size smaller than
3092 // 16x16, ignore the odd portion of the block.
3093 if (tx == TX_4X4 && (ss_v | ss_h)) {
3108 if (tx == TX_4X4 && !skip_inter) {
3109 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3110 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3111 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3113 for (y = row_and_7; y < h + row_and_7; y++) {
3114 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3116 mask[0][y][1] |= m_row_8;
3117 mask[0][y][2] |= m_row_4;
3118 // for odd lines, if the odd col is not being filtered,
3119 // skip odd row also:
3126 // if a/c are even row/col and b/d are odd, and d is skipped,
3127 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3128 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3129 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3131 mask[1][y][col_mask_id] |= m_col;
3134 mask[0][y][3] |= m_col;
3136 if (ss_h && (col_end & 1))
3137 mask[1][y][3] |= (t << (w - 1)) - t;
3139 mask[1][y][3] |= m_col;
3143 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3146 int mask_id = (tx == TX_8X8);
3147 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3148 int l2 = tx + ss_h - 1, step1d;
3149 int m_row = m_col & masks[l2];
3151 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3152 // 8wd loopfilter to prevent going off the visible edge.
3153 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3154 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3155 int m_row_8 = m_row - m_row_16;
3157 for (y = row_and_7; y < h + row_and_7; y++) {
3158 mask[0][y][0] |= m_row_16;
3159 mask[0][y][1] |= m_row_8;
3162 for (y = row_and_7; y < h + row_and_7; y++)
3163 mask[0][y][mask_id] |= m_row;
3168 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3169 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3170 mask[1][y][0] |= m_col;
3171 if (y - row_and_7 == h - 1)
3172 mask[1][y][1] |= m_col;
3174 for (y = row_and_7; y < h + row_and_7; y += step1d)
3175 mask[1][y][mask_id] |= m_col;
3177 } else if (tx != TX_4X4) {
3180 mask_id = (tx == TX_8X8) || (h == ss_v);
3181 mask[1][row_and_7][mask_id] |= m_col;
3182 mask_id = (tx == TX_8X8) || (w == ss_h);
3183 for (y = row_and_7; y < h + row_and_7; y++)
3184 mask[0][y][mask_id] |= t;
3186 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3188 for (y = row_and_7; y < h + row_and_7; y++) {
3189 mask[0][y][2] |= t4;
3190 mask[0][y][1] |= t8;
3192 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3197 static void decode_b(AVCodecContext *ctx, int row, int col,
3198 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3199 enum BlockLevel bl, enum BlockPartition bp)
3201 VP9Context *s = ctx->priv_data;
3203 enum BlockSize bs = bl * 3 + bp;
3204 int bytesperpixel = s->bytesperpixel;
3205 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3207 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3213 s->min_mv.x = -(128 + col * 64);
3214 s->min_mv.y = -(128 + row * 64);
3215 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3216 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3222 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3223 (s->ss_v && h4 * 2 == (1 << b->tx)));
3228 if (bytesperpixel == 1) {
3229 has_coeffs = decode_coeffs_8bpp(ctx);
3231 has_coeffs = decode_coeffs_16bpp(ctx);
3233 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3235 memset(&s->above_skip_ctx[col], 1, w4);
3236 memset(&s->left_skip_ctx[s->row7], 1, h4);
3241 #define SPLAT_ZERO_CTX(v, n) \
3243 case 1: v = 0; break; \
3244 case 2: AV_ZERO16(&v); break; \
3245 case 4: AV_ZERO32(&v); break; \
3246 case 8: AV_ZERO64(&v); break; \
3247 case 16: AV_ZERO128(&v); break; \
3249 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3251 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3252 if (s->ss_##dir2) { \
3253 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3254 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3256 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3257 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3262 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3263 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3264 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3265 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3268 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3269 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3270 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3271 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3276 s->block += w4 * h4 * 64 * bytesperpixel;
3277 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3278 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3279 s->eob += 4 * w4 * h4;
3280 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3281 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3287 // emulated overhangs if the stride of the target buffer can't hold. This
3288 // allows to support emu-edge and so on even if we have large block
3290 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3291 (row + h4) > s->rows;
3292 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3293 (row + h4) > s->rows;
3295 s->dst[0] = s->tmp_y;
3298 s->dst[0] = f->data[0] + yoff;
3299 s->y_stride = f->linesize[0];
3302 s->dst[1] = s->tmp_uv[0];
3303 s->dst[2] = s->tmp_uv[1];
3306 s->dst[1] = f->data[1] + uvoff;
3307 s->dst[2] = f->data[2] + uvoff;
3308 s->uv_stride = f->linesize[1];
3312 intra_recon_16bpp(ctx, yoff, uvoff);
3314 intra_recon_8bpp(ctx, yoff, uvoff);
3318 inter_recon_16bpp(ctx);
3320 inter_recon_8bpp(ctx);
3324 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3326 for (n = 0; o < w; n++) {
3331 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3332 s->tmp_y + o, 128, h, 0, 0);
3333 o += bw * bytesperpixel;
3338 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3339 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3341 for (n = s->ss_h; o < w; n++) {
3346 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3347 s->tmp_uv[0] + o, 128, h, 0, 0);
3348 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3349 s->tmp_uv[1] + o, 128, h, 0, 0);
3350 o += bw * bytesperpixel;
3355 // pick filter level and find edges to apply filter to
3356 if (s->filter.level &&
3357 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3358 [b->mode[3] != ZEROMV]) > 0) {
3359 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3360 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3362 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3363 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3364 if (s->ss_h || s->ss_v)
3365 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3366 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3367 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3368 b->uvtx, skip_inter);
3370 if (!s->filter.lim_lut[lvl]) {
3371 int sharp = s->filter.sharpness;
3375 limit >>= (sharp + 3) >> 2;
3376 limit = FFMIN(limit, 9 - sharp);
3378 limit = FFMAX(limit, 1);
3380 s->filter.lim_lut[lvl] = limit;
3381 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3387 s->block += w4 * h4 * 64 * bytesperpixel;
3388 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3389 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3390 s->eob += 4 * w4 * h4;
3391 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3392 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3396 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3397 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3399 VP9Context *s = ctx->priv_data;
3400 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3401 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3402 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3403 s->prob.p.partition[bl][c];
3404 enum BlockPartition bp;
3405 ptrdiff_t hbs = 4 >> bl;
3406 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3407 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3408 int bytesperpixel = s->bytesperpixel;
3411 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3412 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3413 } else if (col + hbs < s->cols) { // FIXME why not <=?
3414 if (row + hbs < s->rows) { // FIXME why not <=?
3415 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3417 case PARTITION_NONE:
3418 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3421 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3422 yoff += hbs * 8 * y_stride;
3423 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3424 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3427 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3428 yoff += hbs * 8 * bytesperpixel;
3429 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3430 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3432 case PARTITION_SPLIT:
3433 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3434 decode_sb(ctx, row, col + hbs, lflvl,
3435 yoff + 8 * hbs * bytesperpixel,
3436 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3437 yoff += hbs * 8 * y_stride;
3438 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3439 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3440 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3441 yoff + 8 * hbs * bytesperpixel,
3442 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3447 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3448 bp = PARTITION_SPLIT;
3449 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3450 decode_sb(ctx, row, col + hbs, lflvl,
3451 yoff + 8 * hbs * bytesperpixel,
3452 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3455 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3457 } else if (row + hbs < s->rows) { // FIXME why not <=?
3458 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3459 bp = PARTITION_SPLIT;
3460 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3461 yoff += hbs * 8 * y_stride;
3462 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3463 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3466 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3469 bp = PARTITION_SPLIT;
3470 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3472 s->counts.partition[bl][c][bp]++;
3475 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3476 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3478 VP9Context *s = ctx->priv_data;
3480 ptrdiff_t hbs = 4 >> bl;
3481 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3482 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3483 int bytesperpixel = s->bytesperpixel;
3486 av_assert2(b->bl == BL_8X8);
3487 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3488 } else if (s->b->bl == bl) {
3489 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3490 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3491 yoff += hbs * 8 * y_stride;
3492 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3493 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3494 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3495 yoff += hbs * 8 * bytesperpixel;
3496 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3497 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3500 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3501 if (col + hbs < s->cols) { // FIXME why not <=?
3502 if (row + hbs < s->rows) {
3503 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3504 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3505 yoff += hbs * 8 * y_stride;
3506 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3507 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3508 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3509 yoff + 8 * hbs * bytesperpixel,
3510 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3512 yoff += hbs * 8 * bytesperpixel;
3513 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3514 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3516 } else if (row + hbs < s->rows) {
3517 yoff += hbs * 8 * y_stride;
3518 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3519 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3524 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3525 uint8_t *lvl, uint8_t (*mask)[4],
3526 uint8_t *dst, ptrdiff_t ls)
3528 int y, x, bytesperpixel = s->bytesperpixel;
3530 // filter edges between columns (e.g. block1 | block2)
3531 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3532 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3533 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3534 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3535 unsigned hm = hm1 | hm2 | hm13 | hm23;
3537 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3540 int L = *l, H = L >> 4;
3541 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3543 if (hmask1[0] & x) {
3544 if (hmask2[0] & x) {
3545 av_assert2(l[8 << ss_v] == L);
3546 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3548 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3550 } else if (hm2 & x) {
3553 E |= s->filter.mblim_lut[L] << 8;
3554 I |= s->filter.lim_lut[L] << 8;
3555 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3557 [0](ptr, ls, E, I, H);
3559 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3560 [0](ptr, ls, E, I, H);
3562 } else if (hm2 & x) {
3563 int L = l[8 << ss_v], H = L >> 4;
3564 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3566 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3567 [0](ptr + 8 * ls, ls, E, I, H);
3575 int L = *l, H = L >> 4;
3576 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3581 E |= s->filter.mblim_lut[L] << 8;
3582 I |= s->filter.lim_lut[L] << 8;
3583 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3585 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3587 } else if (hm23 & x) {
3588 int L = l[8 << ss_v], H = L >> 4;
3589 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3591 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3599 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3600 uint8_t *lvl, uint8_t (*mask)[4],
3601 uint8_t *dst, ptrdiff_t ls)
3603 int y, x, bytesperpixel = s->bytesperpixel;
3606 // filter edges between rows (e.g. ------)
3608 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3609 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3610 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3612 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3615 int L = *l, H = L >> 4;
3616 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3619 if (vmask[0] & (x << (1 + ss_h))) {
3620 av_assert2(l[1 + ss_h] == L);
3621 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3623 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3625 } else if (vm & (x << (1 + ss_h))) {
3628 E |= s->filter.mblim_lut[L] << 8;
3629 I |= s->filter.lim_lut[L] << 8;
3630 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3631 [!!(vmask[1] & (x << (1 + ss_h)))]
3632 [1](ptr, ls, E, I, H);
3634 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3635 [1](ptr, ls, E, I, H);
3637 } else if (vm & (x << (1 + ss_h))) {
3638 int L = l[1 + ss_h], H = L >> 4;
3639 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3641 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3642 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3647 int L = *l, H = L >> 4;
3648 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3650 if (vm3 & (x << (1 + ss_h))) {
3653 E |= s->filter.mblim_lut[L] << 8;
3654 I |= s->filter.lim_lut[L] << 8;
3655 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3657 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3659 } else if (vm3 & (x << (1 + ss_h))) {
3660 int L = l[1 + ss_h], H = L >> 4;
3661 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3663 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3676 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3677 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3679 VP9Context *s = ctx->priv_data;
3680 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3681 uint8_t *dst = f->data[0] + yoff;
3682 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3683 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3686 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3687 // if you think of them as acting on a 8x8 block max, we can interleave
3688 // each v/h within the single x loop, but that only works if we work on
3689 // 8 pixel blocks, and we won't always do that (we want at least 16px
3690 // to use SSE2 optimizations, perhaps 32 for AVX2)
3692 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3693 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3695 for (p = 0; p < 2; p++) {
3696 dst = f->data[1 + p] + uvoff;
3697 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3698 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3702 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3704 int sb_start = ( idx * n) >> log2_n;
3705 int sb_end = ((idx + 1) * n) >> log2_n;
3706 *start = FFMIN(sb_start, n) << 3;
3707 *end = FFMIN(sb_end, n) << 3;
3710 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3711 int max_count, int update_factor)
3713 unsigned ct = ct0 + ct1, p2, p1;
3719 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3720 p2 = av_clip(p2, 1, 255);
3721 ct = FFMIN(ct, max_count);
3722 update_factor = FASTDIV(update_factor * ct, max_count);
3724 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3725 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3728 static void adapt_probs(VP9Context *s)
3731 prob_context *p = &s->prob_ctx[s->framectxid].p;
3732 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3735 for (i = 0; i < 4; i++)
3736 for (j = 0; j < 2; j++)
3737 for (k = 0; k < 2; k++)
3738 for (l = 0; l < 6; l++)
3739 for (m = 0; m < 6; m++) {
3740 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3741 unsigned *e = s->counts.eob[i][j][k][l][m];
3742 unsigned *c = s->counts.coef[i][j][k][l][m];
3744 if (l == 0 && m >= 3) // dc only has 3 pt
3747 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3748 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3749 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3752 if (s->keyframe || s->intraonly) {
3753 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3754 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3755 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3756 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3761 for (i = 0; i < 3; i++)
3762 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3765 for (i = 0; i < 4; i++)
3766 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3769 if (s->comppredmode == PRED_SWITCHABLE) {
3770 for (i = 0; i < 5; i++)
3771 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3775 if (s->comppredmode != PRED_SINGLEREF) {
3776 for (i = 0; i < 5; i++)
3777 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3778 s->counts.comp_ref[i][1], 20, 128);
3781 if (s->comppredmode != PRED_COMPREF) {
3782 for (i = 0; i < 5; i++) {
3783 uint8_t *pp = p->single_ref[i];
3784 unsigned (*c)[2] = s->counts.single_ref[i];
3786 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3787 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3791 // block partitioning
3792 for (i = 0; i < 4; i++)
3793 for (j = 0; j < 4; j++) {
3794 uint8_t *pp = p->partition[i][j];
3795 unsigned *c = s->counts.partition[i][j];
3797 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3798 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3799 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3803 if (s->txfmmode == TX_SWITCHABLE) {
3804 for (i = 0; i < 2; i++) {
3805 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3807 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3808 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3809 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3810 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3811 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3812 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3816 // interpolation filter
3817 if (s->filtermode == FILTER_SWITCHABLE) {
3818 for (i = 0; i < 4; i++) {
3819 uint8_t *pp = p->filter[i];
3820 unsigned *c = s->counts.filter[i];
3822 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3823 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3828 for (i = 0; i < 7; i++) {
3829 uint8_t *pp = p->mv_mode[i];
3830 unsigned *c = s->counts.mv_mode[i];
3832 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3833 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3834 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3839 uint8_t *pp = p->mv_joint;
3840 unsigned *c = s->counts.mv_joint;
3842 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3843 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3844 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3848 for (i = 0; i < 2; i++) {
3850 unsigned *c, (*c2)[2], sum;
3852 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3853 s->counts.mv_comp[i].sign[1], 20, 128);
3855 pp = p->mv_comp[i].classes;
3856 c = s->counts.mv_comp[i].classes;
3857 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3858 adapt_prob(&pp[0], c[0], sum, 20, 128);
3860 adapt_prob(&pp[1], c[1], sum, 20, 128);
3862 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3863 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3865 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3866 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3868 adapt_prob(&pp[6], c[6], sum, 20, 128);
3869 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3870 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3871 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3873 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3874 s->counts.mv_comp[i].class0[1], 20, 128);
3875 pp = p->mv_comp[i].bits;
3876 c2 = s->counts.mv_comp[i].bits;
3877 for (j = 0; j < 10; j++)
3878 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3880 for (j = 0; j < 2; j++) {
3881 pp = p->mv_comp[i].class0_fp[j];
3882 c = s->counts.mv_comp[i].class0_fp[j];
3883 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3884 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3885 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3887 pp = p->mv_comp[i].fp;
3888 c = s->counts.mv_comp[i].fp;
3889 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3890 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3891 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3893 if (s->highprecisionmvs) {
3894 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3895 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3896 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3897 s->counts.mv_comp[i].hp[1], 20, 128);
3902 for (i = 0; i < 4; i++) {
3903 uint8_t *pp = p->y_mode[i];
3904 unsigned *c = s->counts.y_mode[i], sum, s2;
3906 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3907 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3908 sum -= c[TM_VP8_PRED];
3909 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3910 sum -= c[VERT_PRED];
3911 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3912 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3914 adapt_prob(&pp[3], s2, sum, 20, 128);
3916 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3917 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3918 sum -= c[DIAG_DOWN_LEFT_PRED];
3919 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3920 sum -= c[VERT_LEFT_PRED];
3921 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3922 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3926 for (i = 0; i < 10; i++) {
3927 uint8_t *pp = p->uv_mode[i];
3928 unsigned *c = s->counts.uv_mode[i], sum, s2;
3930 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3931 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3932 sum -= c[TM_VP8_PRED];
3933 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3934 sum -= c[VERT_PRED];
3935 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3936 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3938 adapt_prob(&pp[3], s2, sum, 20, 128);
3940 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3941 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3942 sum -= c[DIAG_DOWN_LEFT_PRED];
3943 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3944 sum -= c[VERT_LEFT_PRED];
3945 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3946 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3950 static void free_buffers(VP9Context *s)
3952 av_freep(&s->intra_pred_data[0]);
3953 av_freep(&s->b_base);
3954 av_freep(&s->block_base);
3957 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3959 VP9Context *s = ctx->priv_data;
3962 for (i = 0; i < 3; i++) {
3963 if (s->frames[i].tf.f->data[0])
3964 vp9_unref_frame(ctx, &s->frames[i]);
3965 av_frame_free(&s->frames[i].tf.f);
3967 for (i = 0; i < 8; i++) {
3968 if (s->refs[i].f->data[0])
3969 ff_thread_release_buffer(ctx, &s->refs[i]);
3970 av_frame_free(&s->refs[i].f);
3971 if (s->next_refs[i].f->data[0])
3972 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3973 av_frame_free(&s->next_refs[i].f);
3983 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3984 int *got_frame, AVPacket *pkt)
3986 const uint8_t *data = pkt->data;
3987 int size = pkt->size;
3988 VP9Context *s = ctx->priv_data;
3989 int res, tile_row, tile_col, i, ref, row, col;
3990 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3991 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3995 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3997 } else if (res == 0) {
3998 if (!s->refs[ref].f->data[0]) {
3999 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4000 return AVERROR_INVALIDDATA;
4002 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4004 ((AVFrame *)frame)->pkt_pts = pkt->pts;
4005 ((AVFrame *)frame)->pkt_dts = pkt->dts;
4006 for (i = 0; i < 8; i++) {
4007 if (s->next_refs[i].f->data[0])
4008 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4009 if (s->refs[i].f->data[0] &&
4010 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4019 if (!retain_segmap_ref) {
4020 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4021 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4022 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4023 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4026 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4027 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4028 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4029 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4031 if (s->frames[CUR_FRAME].tf.f->data[0])
4032 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4033 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4035 f = s->frames[CUR_FRAME].tf.f;
4036 f->key_frame = s->keyframe;
4037 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4038 ls_y = f->linesize[0];
4039 ls_uv =f->linesize[1];
4042 for (i = 0; i < 8; i++) {
4043 if (s->next_refs[i].f->data[0])
4044 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4045 if (s->refreshrefmask & (1 << i)) {
4046 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4047 } else if (s->refs[i].f->data[0]) {
4048 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4054 // main tile decode loop
4055 bytesperpixel = s->bytesperpixel;
4056 memset(s->above_partition_ctx, 0, s->cols);
4057 memset(s->above_skip_ctx, 0, s->cols);
4058 if (s->keyframe || s->intraonly) {
4059 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4061 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4063 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4064 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4065 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4066 memset(s->above_segpred_ctx, 0, s->cols);
4067 s->pass = s->frames[CUR_FRAME].uses_2pass =
4068 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4069 if ((res = update_block_buffers(ctx)) < 0) {
4070 av_log(ctx, AV_LOG_ERROR,
4071 "Failed to allocate block buffers\n");
4074 if (s->refreshctx && s->parallelmode) {
4077 for (i = 0; i < 4; i++) {
4078 for (j = 0; j < 2; j++)
4079 for (k = 0; k < 2; k++)
4080 for (l = 0; l < 6; l++)
4081 for (m = 0; m < 6; m++)
4082 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4083 s->prob.coef[i][j][k][l][m], 3);
4084 if (s->txfmmode == i)
4087 s->prob_ctx[s->framectxid].p = s->prob.p;
4088 ff_thread_finish_setup(ctx);
4089 } else if (!s->refreshctx) {
4090 ff_thread_finish_setup(ctx);
4096 s->block = s->block_base;
4097 s->uvblock[0] = s->uvblock_base[0];
4098 s->uvblock[1] = s->uvblock_base[1];
4099 s->eob = s->eob_base;
4100 s->uveob[0] = s->uveob_base[0];
4101 s->uveob[1] = s->uveob_base[1];
4103 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4104 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4105 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4107 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4110 if (tile_col == s->tiling.tile_cols - 1 &&
4111 tile_row == s->tiling.tile_rows - 1) {
4114 tile_size = AV_RB32(data);
4118 if (tile_size > size) {
4119 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4120 return AVERROR_INVALIDDATA;
4122 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4123 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4124 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4125 return AVERROR_INVALIDDATA;
4132 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4133 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4134 struct VP9Filter *lflvl_ptr = s->lflvl;
4135 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4137 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4138 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4139 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4142 memset(s->left_partition_ctx, 0, 8);
4143 memset(s->left_skip_ctx, 0, 8);
4144 if (s->keyframe || s->intraonly) {
4145 memset(s->left_mode_ctx, DC_PRED, 16);
4147 memset(s->left_mode_ctx, NEARESTMV, 8);
4149 memset(s->left_y_nnz_ctx, 0, 16);
4150 memset(s->left_uv_nnz_ctx, 0, 32);
4151 memset(s->left_segpred_ctx, 0, 8);
4153 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4156 for (col = s->tiling.tile_col_start;
4157 col < s->tiling.tile_col_end;
4158 col += 8, yoff2 += 64 * bytesperpixel,
4159 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4160 // FIXME integrate with lf code (i.e. zero after each
4161 // use, similar to invtxfm coefficients, or similar)
4163 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4167 decode_sb_mem(ctx, row, col, lflvl_ptr,
4168 yoff2, uvoff2, BL_64X64);
4170 decode_sb(ctx, row, col, lflvl_ptr,
4171 yoff2, uvoff2, BL_64X64);
4175 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4183 // backup pre-loopfilter reconstruction data for intra
4184 // prediction of next row of sb64s
4185 if (row + 8 < s->rows) {
4186 memcpy(s->intra_pred_data[0],
4187 f->data[0] + yoff + 63 * ls_y,
4188 8 * s->cols * bytesperpixel);
4189 memcpy(s->intra_pred_data[1],
4190 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4191 8 * s->cols * bytesperpixel >> s->ss_h);
4192 memcpy(s->intra_pred_data[2],
4193 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4194 8 * s->cols * bytesperpixel >> s->ss_h);
4197 // loopfilter one row
4198 if (s->filter.level) {
4201 lflvl_ptr = s->lflvl;
4202 for (col = 0; col < s->cols;
4203 col += 8, yoff2 += 64 * bytesperpixel,
4204 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4205 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4209 // FIXME maybe we can make this more finegrained by running the
4210 // loopfilter per-block instead of after each sbrow
4211 // In fact that would also make intra pred left preparation easier?
4212 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4216 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4218 ff_thread_finish_setup(ctx);
4220 } while (s->pass++ == 1);
4221 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4224 for (i = 0; i < 8; i++) {
4225 if (s->refs[i].f->data[0])
4226 ff_thread_release_buffer(ctx, &s->refs[i]);
4227 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4230 if (!s->invisible) {
4231 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4239 static void vp9_decode_flush(AVCodecContext *ctx)
4241 VP9Context *s = ctx->priv_data;
4244 for (i = 0; i < 3; i++)
4245 vp9_unref_frame(ctx, &s->frames[i]);
4246 for (i = 0; i < 8; i++)
4247 ff_thread_release_buffer(ctx, &s->refs[i]);
4250 static int init_frames(AVCodecContext *ctx)
4252 VP9Context *s = ctx->priv_data;
4255 for (i = 0; i < 3; i++) {
4256 s->frames[i].tf.f = av_frame_alloc();
4257 if (!s->frames[i].tf.f) {
4258 vp9_decode_free(ctx);
4259 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4260 return AVERROR(ENOMEM);
4263 for (i = 0; i < 8; i++) {
4264 s->refs[i].f = av_frame_alloc();
4265 s->next_refs[i].f = av_frame_alloc();
4266 if (!s->refs[i].f || !s->next_refs[i].f) {
4267 vp9_decode_free(ctx);
4268 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4269 return AVERROR(ENOMEM);
4276 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4278 VP9Context *s = ctx->priv_data;
4280 ctx->internal->allocate_progress = 1;
4282 s->filter.sharpness = -1;
4284 return init_frames(ctx);
4287 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4289 return init_frames(avctx);
4292 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4295 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4297 // detect size changes in other threads
4298 if (s->intra_pred_data[0] &&
4299 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4303 for (i = 0; i < 3; i++) {
4304 if (s->frames[i].tf.f->data[0])
4305 vp9_unref_frame(dst, &s->frames[i]);
4306 if (ssrc->frames[i].tf.f->data[0]) {
4307 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4311 for (i = 0; i < 8; i++) {
4312 if (s->refs[i].f->data[0])
4313 ff_thread_release_buffer(dst, &s->refs[i]);
4314 if (ssrc->next_refs[i].f->data[0]) {
4315 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4320 s->invisible = ssrc->invisible;
4321 s->keyframe = ssrc->keyframe;
4322 s->ss_v = ssrc->ss_v;
4323 s->ss_h = ssrc->ss_h;
4324 s->segmentation.enabled = ssrc->segmentation.enabled;
4325 s->segmentation.update_map = ssrc->segmentation.update_map;
4326 s->bytesperpixel = ssrc->bytesperpixel;
4328 s->bpp_index = ssrc->bpp_index;
4329 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4330 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4331 if (ssrc->segmentation.enabled) {
4332 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4333 sizeof(s->segmentation.feat));
4339 static const AVProfile profiles[] = {
4340 { FF_PROFILE_VP9_0, "Profile 0" },
4341 { FF_PROFILE_VP9_1, "Profile 1" },
4342 { FF_PROFILE_VP9_2, "Profile 2" },
4343 { FF_PROFILE_VP9_3, "Profile 3" },
4344 { FF_PROFILE_UNKNOWN },
4347 AVCodec ff_vp9_decoder = {
4349 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4350 .type = AVMEDIA_TYPE_VIDEO,
4351 .id = AV_CODEC_ID_VP9,
4352 .priv_data_size = sizeof(VP9Context),
4353 .init = vp9_decode_init,
4354 .close = vp9_decode_free,
4355 .decode = vp9_decode_frame,
4356 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4357 .flush = vp9_decode_flush,
4358 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4359 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4360 .profiles = NULL_IF_CONFIG_SMALL(profiles),