2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
156 uint8_t ignore_refmap;
161 uint8_t skip_enabled;
170 unsigned log2_tile_cols, log2_tile_rows;
171 unsigned tile_cols, tile_rows;
172 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174 unsigned sb_cols, sb_rows, rows, cols;
177 uint8_t coef[4][2][2][6][6][3];
181 uint8_t coef[4][2][2][6][6][11];
186 unsigned y_mode[4][10];
187 unsigned uv_mode[10][10];
188 unsigned filter[4][3];
189 unsigned mv_mode[7][4];
190 unsigned intra[4][2];
192 unsigned single_ref[5][2][2];
193 unsigned comp_ref[5][2];
194 unsigned tx32p[2][4];
195 unsigned tx16p[2][3];
198 unsigned mv_joint[4];
201 unsigned classes[11];
203 unsigned bits[10][2];
204 unsigned class0_fp[2][4];
206 unsigned class0_hp[2];
209 unsigned partition[4][4][4];
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
213 enum TxfmMode txfmmode;
214 enum CompPredMode comppredmode;
216 // contextual (left/above) cache
217 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229 uint8_t *above_partition_ctx;
230 uint8_t *above_mode_ctx;
231 // FIXME maybe merge some of the below in a flags field?
232 uint8_t *above_y_nnz_ctx;
233 uint8_t *above_uv_nnz_ctx[2];
234 uint8_t *above_skip_ctx; // 1bit
235 uint8_t *above_txfm_ctx; // 2bit
236 uint8_t *above_segpred_ctx; // 1bit
237 uint8_t *above_intra_ctx; // 1bit
238 uint8_t *above_comp_ctx; // 1bit
239 uint8_t *above_ref_ctx; // 2bit
240 uint8_t *above_filter_ctx;
241 VP56mv (*above_mv_ctx)[2];
244 uint8_t *intra_pred_data[3];
245 struct VP9Filter *lflvl;
246 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248 // block reconstruction intermediates
249 int block_alloc_using_2pass;
250 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252 struct { int x, y; } min_mv, max_mv;
253 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255 uint16_t mvscale[3][2];
256 uint8_t mvstep[3][2];
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271 VP9Context *s = ctx->priv_data;
274 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276 sz = 64 * s->sb_cols * s->sb_rows;
277 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278 ff_thread_release_buffer(ctx, &f->tf);
279 return AVERROR(ENOMEM);
282 f->segmentation_map = f->extradata->data;
283 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
307 dst->uses_2pass = src->uses_2pass;
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 VP9Context *s = ctx->priv_data;
316 int bytesperpixel = s->bytesperpixel;
318 av_assert0(w > 0 && h > 0);
320 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326 s->sb_cols = (w + 63) >> 6;
327 s->sb_rows = (h + 63) >> 6;
328 s->cols = (w + 7) >> 3;
329 s->rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332 av_freep(&s->intra_pred_data[0]);
333 // FIXME we slightly over-allocate here for subsampled chroma, but a little
334 // bit of padding shouldn't affect performance...
335 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338 return AVERROR(ENOMEM);
339 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342 assign(s->above_y_nnz_ctx, uint8_t *, 16);
343 assign(s->above_mode_ctx, uint8_t *, 16);
344 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347 assign(s->above_partition_ctx, uint8_t *, 8);
348 assign(s->above_skip_ctx, uint8_t *, 8);
349 assign(s->above_txfm_ctx, uint8_t *, 8);
350 assign(s->above_segpred_ctx, uint8_t *, 8);
351 assign(s->above_intra_ctx, uint8_t *, 8);
352 assign(s->above_comp_ctx, uint8_t *, 8);
353 assign(s->above_ref_ctx, uint8_t *, 8);
354 assign(s->above_filter_ctx, uint8_t *, 8);
355 assign(s->lflvl, struct VP9Filter *, 1);
358 // these will be re-allocated a little later
359 av_freep(&s->b_base);
360 av_freep(&s->block_base);
362 if (s->bpp != s->last_bpp) {
363 ff_vp9dsp_init(&s->dsp, s->bpp);
364 ff_videodsp_init(&s->vdsp, s->bpp);
365 s->last_bpp = s->bpp;
371 static int update_block_buffers(AVCodecContext *ctx)
373 VP9Context *s = ctx->priv_data;
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380 av_free(s->block_base);
381 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383 if (s->frames[CUR_FRAME].uses_2pass) {
384 int sbs = s->sb_cols * s->sb_rows;
386 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388 16 * 16 + 2 * chroma_eobs) * sbs);
389 if (!s->b_base || !s->block_base)
390 return AVERROR(ENOMEM);
391 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397 s->b_base = av_malloc(sizeof(VP9Block));
398 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399 16 * 16 + 2 * chroma_eobs);
400 if (!s->b_base || !s->block_base)
401 return AVERROR(ENOMEM);
402 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405 s->uveob_base[0] = s->eob_base + 16 * 16;
406 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
414 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416 int v = get_bits(gb, n);
417 return get_bits1(gb) ? -v : v;
420 static av_always_inline int inv_recenter_nonneg(int v, int m)
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
428 static const int inv_map_table[255] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 /* This code is trying to do a differential probability update. For a
452 * current probability A in the range [1, 255], the difference to a new
453 * probability of any value can be expressed differentially as 1-A,255-A
454 * where some part of this (absolute range) exists both in positive as
455 * well as the negative part, whereas another part only exists in one
456 * half. We're trying to code this shared part differentially, i.e.
457 * times two where the value of the lowest bit specifies the sign, and
458 * the single part is then coded on top of this. This absolute difference
459 * then again has a value of [0,254], but a bigger value in this range
460 * indicates that we're further away from the original value A, so we
461 * can code this as a VLC code, since higher values are increasingly
462 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463 * updates vs. the 'fine, exact' updates further down the range, which
464 * adds one extra dimension to this differential update model. */
466 if (!vp8_rac_get(c)) {
467 d = vp8_rac_get_uint(c, 4) + 0;
468 } else if (!vp8_rac_get(c)) {
469 d = vp8_rac_get_uint(c, 4) + 16;
470 } else if (!vp8_rac_get(c)) {
471 d = vp8_rac_get_uint(c, 5) + 32;
473 d = vp8_rac_get_uint(c, 7);
475 d = (d << 1) - 65 + vp8_rac_get(c);
477 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
480 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
481 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
484 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
486 static const enum AVColorSpace colorspaces[8] = {
487 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
488 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
490 VP9Context *s = ctx->priv_data;
491 enum AVPixelFormat res;
492 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
495 s->bpp = 8 + bits * 2;
496 s->bytesperpixel = (7 + s->bpp) >> 3;
497 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
498 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
499 static const enum AVPixelFormat pix_fmt_rgb[3] = {
500 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
502 if (ctx->profile & 1) {
503 s->ss_h = s->ss_v = 1;
504 res = pix_fmt_rgb[bits];
505 ctx->color_range = AVCOL_RANGE_JPEG;
507 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
509 return AVERROR_INVALIDDATA;
512 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
513 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
514 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
515 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
516 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
517 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
518 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
520 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
521 if (ctx->profile & 1) {
522 s->ss_h = get_bits1(&s->gb);
523 s->ss_v = get_bits1(&s->gb);
524 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
525 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
527 return AVERROR_INVALIDDATA;
528 } else if (get_bits1(&s->gb)) {
529 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
531 return AVERROR_INVALIDDATA;
534 s->ss_h = s->ss_v = 1;
535 res = pix_fmt_for_ss[bits][1][1];
542 static int decode_frame_header(AVCodecContext *ctx,
543 const uint8_t *data, int size, int *ref)
545 VP9Context *s = ctx->priv_data;
546 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
547 enum AVPixelFormat fmt = ctx->pix_fmt;
549 const uint8_t *data2;
552 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
553 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
556 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
557 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
558 return AVERROR_INVALIDDATA;
560 ctx->profile = get_bits1(&s->gb);
561 ctx->profile |= get_bits1(&s->gb) << 1;
562 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
563 if (ctx->profile > 3) {
564 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
565 return AVERROR_INVALIDDATA;
567 if (get_bits1(&s->gb)) {
568 *ref = get_bits(&s->gb, 3);
571 s->last_keyframe = s->keyframe;
572 s->keyframe = !get_bits1(&s->gb);
573 last_invisible = s->invisible;
574 s->invisible = !get_bits1(&s->gb);
575 s->errorres = get_bits1(&s->gb);
576 s->use_last_frame_mvs = !s->errorres && !last_invisible;
578 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
579 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
580 return AVERROR_INVALIDDATA;
582 if ((fmt = read_colorspace_details(ctx)) < 0)
584 // for profile 1, here follows the subsampling bits
585 s->refreshrefmask = 0xff;
586 w = get_bits(&s->gb, 16) + 1;
587 h = get_bits(&s->gb, 16) + 1;
588 if (get_bits1(&s->gb)) // display size
589 skip_bits(&s->gb, 32);
591 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
592 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
594 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
595 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
596 return AVERROR_INVALIDDATA;
598 if (ctx->profile >= 1) {
599 if ((fmt = read_colorspace_details(ctx)) < 0)
602 s->ss_h = s->ss_v = 1;
605 s->bytesperpixel = 1;
606 fmt = AV_PIX_FMT_YUV420P;
607 ctx->colorspace = AVCOL_SPC_BT470BG;
608 ctx->color_range = AVCOL_RANGE_JPEG;
610 s->refreshrefmask = get_bits(&s->gb, 8);
611 w = get_bits(&s->gb, 16) + 1;
612 h = get_bits(&s->gb, 16) + 1;
613 if (get_bits1(&s->gb)) // display size
614 skip_bits(&s->gb, 32);
616 s->refreshrefmask = get_bits(&s->gb, 8);
617 s->refidx[0] = get_bits(&s->gb, 3);
618 s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
619 s->refidx[1] = get_bits(&s->gb, 3);
620 s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
621 s->refidx[2] = get_bits(&s->gb, 3);
622 s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
623 if (!s->refs[s->refidx[0]].f->data[0] ||
624 !s->refs[s->refidx[1]].f->data[0] ||
625 !s->refs[s->refidx[2]].f->data[0]) {
626 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
627 return AVERROR_INVALIDDATA;
629 if (get_bits1(&s->gb)) {
630 w = s->refs[s->refidx[0]].f->width;
631 h = s->refs[s->refidx[0]].f->height;
632 } else if (get_bits1(&s->gb)) {
633 w = s->refs[s->refidx[1]].f->width;
634 h = s->refs[s->refidx[1]].f->height;
635 } else if (get_bits1(&s->gb)) {
636 w = s->refs[s->refidx[2]].f->width;
637 h = s->refs[s->refidx[2]].f->height;
639 w = get_bits(&s->gb, 16) + 1;
640 h = get_bits(&s->gb, 16) + 1;
642 // Note that in this code, "CUR_FRAME" is actually before we
643 // have formally allocated a frame, and thus actually represents
645 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
646 s->frames[CUR_FRAME].tf.f->height == h;
647 if (get_bits1(&s->gb)) // display size
648 skip_bits(&s->gb, 32);
649 s->highprecisionmvs = get_bits1(&s->gb);
650 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
652 s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
653 s->signbias[0] != s->signbias[2]);
654 if (s->allowcompinter) {
655 if (s->signbias[0] == s->signbias[1]) {
657 s->varcompref[0] = 0;
658 s->varcompref[1] = 1;
659 } else if (s->signbias[0] == s->signbias[2]) {
661 s->varcompref[0] = 0;
662 s->varcompref[1] = 2;
665 s->varcompref[0] = 1;
666 s->varcompref[1] = 2;
670 for (i = 0; i < 3; i++) {
671 AVFrame *ref = s->refs[s->refidx[i]].f;
672 int refw = ref->width, refh = ref->height;
674 if (ref->format != fmt) {
675 av_log(ctx, AV_LOG_ERROR,
676 "Ref pixfmt (%s) did not match current frame (%s)",
677 av_get_pix_fmt_name(ref->format),
678 av_get_pix_fmt_name(fmt));
679 return AVERROR_INVALIDDATA;
680 } else if (refw == w && refh == h) {
681 s->mvscale[i][0] = s->mvscale[i][1] = 0;
683 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
684 av_log(ctx, AV_LOG_ERROR,
685 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
687 return AVERROR_INVALIDDATA;
689 s->mvscale[i][0] = (refw << 14) / w;
690 s->mvscale[i][1] = (refh << 14) / h;
691 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
692 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
697 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
698 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
699 s->framectxid = c = get_bits(&s->gb, 2);
701 /* loopfilter header data */
702 if (s->keyframe || s->errorres || s->intraonly) {
703 // reset loopfilter defaults
704 s->lf_delta.ref[0] = 1;
705 s->lf_delta.ref[1] = 0;
706 s->lf_delta.ref[2] = -1;
707 s->lf_delta.ref[3] = -1;
708 s->lf_delta.mode[0] = 0;
709 s->lf_delta.mode[1] = 0;
711 s->filter.level = get_bits(&s->gb, 6);
712 sharp = get_bits(&s->gb, 3);
713 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
714 // the old cache values since they are still valid
715 if (s->filter.sharpness != sharp)
716 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
717 s->filter.sharpness = sharp;
718 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
719 if (get_bits1(&s->gb)) {
720 for (i = 0; i < 4; i++)
721 if (get_bits1(&s->gb))
722 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
723 for (i = 0; i < 2; i++)
724 if (get_bits1(&s->gb))
725 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
729 /* quantization header data */
730 s->yac_qi = get_bits(&s->gb, 8);
731 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
732 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
733 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
734 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
735 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
737 ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
739 /* segmentation header info */
740 s->segmentation.ignore_refmap = 0;
741 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
742 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
743 for (i = 0; i < 7; i++)
744 s->prob.seg[i] = get_bits1(&s->gb) ?
745 get_bits(&s->gb, 8) : 255;
746 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
747 for (i = 0; i < 3; i++)
748 s->prob.segpred[i] = get_bits1(&s->gb) ?
749 get_bits(&s->gb, 8) : 255;
752 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
753 (w != s->frames[CUR_FRAME].tf.f->width ||
754 h != s->frames[CUR_FRAME].tf.f->height)) {
755 av_log(ctx, AV_LOG_WARNING,
756 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
757 s->segmentation.temporal, s->segmentation.update_map);
758 s->segmentation.ignore_refmap = 1;
759 //return AVERROR_INVALIDDATA;
762 if (get_bits1(&s->gb)) {
763 s->segmentation.absolute_vals = get_bits1(&s->gb);
764 for (i = 0; i < 8; i++) {
765 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
766 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
767 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
768 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
769 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
770 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
771 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
775 s->segmentation.feat[0].q_enabled = 0;
776 s->segmentation.feat[0].lf_enabled = 0;
777 s->segmentation.feat[0].skip_enabled = 0;
778 s->segmentation.feat[0].ref_enabled = 0;
781 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
782 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
783 int qyac, qydc, quvac, quvdc, lflvl, sh;
785 if (s->segmentation.feat[i].q_enabled) {
786 if (s->segmentation.absolute_vals)
787 qyac = s->segmentation.feat[i].q_val;
789 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
793 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
794 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
795 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
796 qyac = av_clip_uintp2(qyac, 8);
798 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
799 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
800 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
801 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
803 sh = s->filter.level >= 32;
804 if (s->segmentation.feat[i].lf_enabled) {
805 if (s->segmentation.absolute_vals)
806 lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
808 lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
810 lflvl = s->filter.level;
812 if (s->lf_delta.enabled) {
813 s->segmentation.feat[i].lflvl[0][0] =
814 s->segmentation.feat[i].lflvl[0][1] =
815 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
816 for (j = 1; j < 4; j++) {
817 s->segmentation.feat[i].lflvl[j][0] =
818 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
819 s->lf_delta.mode[0]) * (1 << sh)), 6);
820 s->segmentation.feat[i].lflvl[j][1] =
821 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
822 s->lf_delta.mode[1]) * (1 << sh)), 6);
825 memset(s->segmentation.feat[i].lflvl, lflvl,
826 sizeof(s->segmentation.feat[i].lflvl));
831 if ((res = update_size(ctx, w, h, fmt)) < 0) {
832 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
835 for (s->tiling.log2_tile_cols = 0;
836 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
837 s->tiling.log2_tile_cols++) ;
838 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
839 max = FFMAX(0, max - 1);
840 while (max > s->tiling.log2_tile_cols) {
841 if (get_bits1(&s->gb))
842 s->tiling.log2_tile_cols++;
846 s->tiling.log2_tile_rows = decode012(&s->gb);
847 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
848 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
849 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
850 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
851 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
853 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
854 return AVERROR(ENOMEM);
858 if (s->keyframe || s->errorres || s->intraonly) {
859 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
860 s->prob_ctx[3].p = vp9_default_probs;
861 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
862 sizeof(vp9_default_coef_probs));
863 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
864 sizeof(vp9_default_coef_probs));
865 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
866 sizeof(vp9_default_coef_probs));
867 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
868 sizeof(vp9_default_coef_probs));
871 // next 16 bits is size of the rest of the header (arith-coded)
872 size2 = get_bits(&s->gb, 16);
873 data2 = align_get_bits(&s->gb);
874 if (size2 > size - (data2 - data)) {
875 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
876 return AVERROR_INVALIDDATA;
878 ff_vp56_init_range_decoder(&s->c, data2, size2);
879 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
880 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
881 return AVERROR_INVALIDDATA;
884 if (s->keyframe || s->intraonly) {
885 memset(s->counts.coef, 0, sizeof(s->counts.coef));
886 memset(s->counts.eob, 0, sizeof(s->counts.eob));
888 memset(&s->counts, 0, sizeof(s->counts));
890 // FIXME is it faster to not copy here, but do it down in the fw updates
891 // as explicit copies if the fw update is missing (and skip the copy upon
893 s->prob.p = s->prob_ctx[c].p;
897 s->txfmmode = TX_4X4;
899 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
900 if (s->txfmmode == 3)
901 s->txfmmode += vp8_rac_get(&s->c);
903 if (s->txfmmode == TX_SWITCHABLE) {
904 for (i = 0; i < 2; i++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
907 for (i = 0; i < 2; i++)
908 for (j = 0; j < 2; j++)
909 if (vp56_rac_get_prob_branchy(&s->c, 252))
910 s->prob.p.tx16p[i][j] =
911 update_prob(&s->c, s->prob.p.tx16p[i][j]);
912 for (i = 0; i < 2; i++)
913 for (j = 0; j < 3; j++)
914 if (vp56_rac_get_prob_branchy(&s->c, 252))
915 s->prob.p.tx32p[i][j] =
916 update_prob(&s->c, s->prob.p.tx32p[i][j]);
921 for (i = 0; i < 4; i++) {
922 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
923 if (vp8_rac_get(&s->c)) {
924 for (j = 0; j < 2; j++)
925 for (k = 0; k < 2; k++)
926 for (l = 0; l < 6; l++)
927 for (m = 0; m < 6; m++) {
928 uint8_t *p = s->prob.coef[i][j][k][l][m];
929 uint8_t *r = ref[j][k][l][m];
930 if (m >= 3 && l == 0) // dc only has 3 pt
932 for (n = 0; n < 3; n++) {
933 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
934 p[n] = update_prob(&s->c, r[n]);
942 for (j = 0; j < 2; j++)
943 for (k = 0; k < 2; k++)
944 for (l = 0; l < 6; l++)
945 for (m = 0; m < 6; m++) {
946 uint8_t *p = s->prob.coef[i][j][k][l][m];
947 uint8_t *r = ref[j][k][l][m];
948 if (m > 3 && l == 0) // dc only has 3 pt
954 if (s->txfmmode == i)
959 for (i = 0; i < 3; i++)
960 if (vp56_rac_get_prob_branchy(&s->c, 252))
961 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
962 if (!s->keyframe && !s->intraonly) {
963 for (i = 0; i < 7; i++)
964 for (j = 0; j < 3; j++)
965 if (vp56_rac_get_prob_branchy(&s->c, 252))
966 s->prob.p.mv_mode[i][j] =
967 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
969 if (s->filtermode == FILTER_SWITCHABLE)
970 for (i = 0; i < 4; i++)
971 for (j = 0; j < 2; j++)
972 if (vp56_rac_get_prob_branchy(&s->c, 252))
973 s->prob.p.filter[i][j] =
974 update_prob(&s->c, s->prob.p.filter[i][j]);
976 for (i = 0; i < 4; i++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
980 if (s->allowcompinter) {
981 s->comppredmode = vp8_rac_get(&s->c);
983 s->comppredmode += vp8_rac_get(&s->c);
984 if (s->comppredmode == PRED_SWITCHABLE)
985 for (i = 0; i < 5; i++)
986 if (vp56_rac_get_prob_branchy(&s->c, 252))
988 update_prob(&s->c, s->prob.p.comp[i]);
990 s->comppredmode = PRED_SINGLEREF;
993 if (s->comppredmode != PRED_COMPREF) {
994 for (i = 0; i < 5; i++) {
995 if (vp56_rac_get_prob_branchy(&s->c, 252))
996 s->prob.p.single_ref[i][0] =
997 update_prob(&s->c, s->prob.p.single_ref[i][0]);
998 if (vp56_rac_get_prob_branchy(&s->c, 252))
999 s->prob.p.single_ref[i][1] =
1000 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1004 if (s->comppredmode != PRED_SINGLEREF) {
1005 for (i = 0; i < 5; i++)
1006 if (vp56_rac_get_prob_branchy(&s->c, 252))
1007 s->prob.p.comp_ref[i] =
1008 update_prob(&s->c, s->prob.p.comp_ref[i]);
1011 for (i = 0; i < 4; i++)
1012 for (j = 0; j < 9; j++)
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.y_mode[i][j] =
1015 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1017 for (i = 0; i < 4; i++)
1018 for (j = 0; j < 4; j++)
1019 for (k = 0; k < 3; k++)
1020 if (vp56_rac_get_prob_branchy(&s->c, 252))
1021 s->prob.p.partition[3 - i][j][k] =
1022 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1024 // mv fields don't use the update_prob subexp model for some reason
1025 for (i = 0; i < 3; i++)
1026 if (vp56_rac_get_prob_branchy(&s->c, 252))
1027 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1029 for (i = 0; i < 2; i++) {
1030 if (vp56_rac_get_prob_branchy(&s->c, 252))
1031 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1033 for (j = 0; j < 10; j++)
1034 if (vp56_rac_get_prob_branchy(&s->c, 252))
1035 s->prob.p.mv_comp[i].classes[j] =
1036 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1038 if (vp56_rac_get_prob_branchy(&s->c, 252))
1039 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041 for (j = 0; j < 10; j++)
1042 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043 s->prob.p.mv_comp[i].bits[j] =
1044 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1047 for (i = 0; i < 2; i++) {
1048 for (j = 0; j < 2; j++)
1049 for (k = 0; k < 3; k++)
1050 if (vp56_rac_get_prob_branchy(&s->c, 252))
1051 s->prob.p.mv_comp[i].class0_fp[j][k] =
1052 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1054 for (j = 0; j < 3; j++)
1055 if (vp56_rac_get_prob_branchy(&s->c, 252))
1056 s->prob.p.mv_comp[i].fp[j] =
1057 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1060 if (s->highprecisionmvs) {
1061 for (i = 0; i < 2; i++) {
1062 if (vp56_rac_get_prob_branchy(&s->c, 252))
1063 s->prob.p.mv_comp[i].class0_hp =
1064 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1066 if (vp56_rac_get_prob_branchy(&s->c, 252))
1067 s->prob.p.mv_comp[i].hp =
1068 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1073 return (data2 - data) + size2;
1076 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1079 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1080 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1083 static void find_ref_mvs(VP9Context *s,
1084 VP56mv *pmv, int ref, int z, int idx, int sb)
1086 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1087 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1088 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1089 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1090 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1091 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1092 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1093 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1094 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1095 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1096 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1097 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1098 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1099 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1100 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1101 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1102 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1103 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1104 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1105 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1106 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1107 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1108 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1109 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1110 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1111 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1112 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1115 int row = s->row, col = s->col, row7 = s->row7;
1116 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1117 #define INVALID_MV 0x80008000U
1118 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1121 #define RETURN_DIRECT_MV(mv) \
1123 uint32_t m = AV_RN32A(&mv); \
1127 } else if (mem == INVALID_MV) { \
1129 } else if (m != mem) { \
1136 if (sb == 2 || sb == 1) {
1137 RETURN_DIRECT_MV(b->mv[0][z]);
1138 } else if (sb == 3) {
1139 RETURN_DIRECT_MV(b->mv[2][z]);
1140 RETURN_DIRECT_MV(b->mv[1][z]);
1141 RETURN_DIRECT_MV(b->mv[0][z]);
1144 #define RETURN_MV(mv) \
1149 av_assert2(idx == 1); \
1150 av_assert2(mem != INVALID_MV); \
1151 if (mem_sub8x8 == INVALID_MV) { \
1152 clamp_mv(&tmp, &mv, s); \
1153 m = AV_RN32A(&tmp); \
1158 mem_sub8x8 = AV_RN32A(&mv); \
1159 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1160 clamp_mv(&tmp, &mv, s); \
1161 m = AV_RN32A(&tmp); \
1165 /* BUG I'm pretty sure this isn't the intention */ \
1171 uint32_t m = AV_RN32A(&mv); \
1173 clamp_mv(pmv, &mv, s); \
1175 } else if (mem == INVALID_MV) { \
1177 } else if (m != mem) { \
1178 clamp_mv(pmv, &mv, s); \
1185 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1186 if (mv->ref[0] == ref) {
1187 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1188 } else if (mv->ref[1] == ref) {
1189 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1192 if (col > s->tiling.tile_col_start) {
1193 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1194 if (mv->ref[0] == ref) {
1195 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1196 } else if (mv->ref[1] == ref) {
1197 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1205 // previously coded MVs in this neighbourhood, using same reference frame
1206 for (; i < 8; i++) {
1207 int c = p[i][0] + col, r = p[i][1] + row;
1209 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1210 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1212 if (mv->ref[0] == ref) {
1213 RETURN_MV(mv->mv[0]);
1214 } else if (mv->ref[1] == ref) {
1215 RETURN_MV(mv->mv[1]);
1220 // MV at this position in previous frame, using same reference frame
1221 if (s->use_last_frame_mvs) {
1222 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1224 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1225 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1226 if (mv->ref[0] == ref) {
1227 RETURN_MV(mv->mv[0]);
1228 } else if (mv->ref[1] == ref) {
1229 RETURN_MV(mv->mv[1]);
1233 #define RETURN_SCALE_MV(mv, scale) \
1236 VP56mv mv_temp = { -mv.x, -mv.y }; \
1237 RETURN_MV(mv_temp); \
1243 // previously coded MVs in this neighbourhood, using different reference frame
1244 for (i = 0; i < 8; i++) {
1245 int c = p[i][0] + col, r = p[i][1] + row;
1247 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1248 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1250 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1251 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1253 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1254 // BUG - libvpx has this condition regardless of whether
1255 // we used the first ref MV and pre-scaling
1256 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1257 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1262 // MV at this position in previous frame, using different reference frame
1263 if (s->use_last_frame_mvs) {
1264 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1266 // no need to await_progress, because we already did that above
1267 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1268 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1270 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1271 // BUG - libvpx has this condition regardless of whether
1272 // we used the first ref MV and pre-scaling
1273 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1274 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1279 clamp_mv(pmv, pmv, s);
1282 #undef RETURN_SCALE_MV
1285 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1287 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1288 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1289 s->prob.p.mv_comp[idx].classes);
1291 s->counts.mv_comp[idx].sign[sign]++;
1292 s->counts.mv_comp[idx].classes[c]++;
1296 for (n = 0, m = 0; m < c; m++) {
1297 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1299 s->counts.mv_comp[idx].bits[m][bit]++;
1302 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1304 s->counts.mv_comp[idx].fp[bit]++;
1306 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1307 s->counts.mv_comp[idx].hp[bit]++;
1311 // bug in libvpx - we count for bw entropy purposes even if the
1313 s->counts.mv_comp[idx].hp[1]++;
1317 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1318 s->counts.mv_comp[idx].class0[n]++;
1319 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1320 s->prob.p.mv_comp[idx].class0_fp[n]);
1321 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1322 n = (n << 3) | (bit << 1);
1324 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1325 s->counts.mv_comp[idx].class0_hp[bit]++;
1329 // bug in libvpx - we count for bw entropy purposes even if the
1331 s->counts.mv_comp[idx].class0_hp[1]++;
1335 return sign ? -(n + 1) : (n + 1);
1338 static void fill_mv(VP9Context *s,
1339 VP56mv *mv, int mode, int sb)
1343 if (mode == ZEROMV) {
1348 // FIXME cache this value and reuse for other subblocks
1349 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1350 mode == NEWMV ? -1 : sb);
1351 // FIXME maybe move this code into find_ref_mvs()
1352 if ((mode == NEWMV || sb == -1) &&
1353 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1367 if (mode == NEWMV) {
1368 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1369 s->prob.p.mv_joint);
1371 s->counts.mv_joint[j]++;
1372 if (j >= MV_JOINT_V)
1373 mv[0].y += read_mv_component(s, 0, hp);
1375 mv[0].x += read_mv_component(s, 1, hp);
1379 // FIXME cache this value and reuse for other subblocks
1380 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1381 mode == NEWMV ? -1 : sb);
1382 if ((mode == NEWMV || sb == -1) &&
1383 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1397 if (mode == NEWMV) {
1398 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1399 s->prob.p.mv_joint);
1401 s->counts.mv_joint[j]++;
1402 if (j >= MV_JOINT_V)
1403 mv[1].y += read_mv_component(s, 0, hp);
1405 mv[1].x += read_mv_component(s, 1, hp);
1411 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1412 ptrdiff_t stride, int v)
1422 int v16 = v * 0x0101;
1430 uint32_t v32 = v * 0x01010101;
1439 uint64_t v64 = v * 0x0101010101010101ULL;
1445 uint32_t v32 = v * 0x01010101;
1448 AV_WN32A(ptr + 4, v32);
1457 static void decode_mode(AVCodecContext *ctx)
1459 static const uint8_t left_ctx[N_BS_SIZES] = {
1460 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1462 static const uint8_t above_ctx[N_BS_SIZES] = {
1463 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1465 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1466 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1467 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1469 VP9Context *s = ctx->priv_data;
1471 int row = s->row, col = s->col, row7 = s->row7;
1472 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1473 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1474 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1475 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1476 int vref, filter_id;
1478 if (!s->segmentation.enabled) {
1480 } else if (s->keyframe || s->intraonly) {
1481 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1482 } else if (!s->segmentation.update_map ||
1483 (s->segmentation.temporal &&
1484 vp56_rac_get_prob_branchy(&s->c,
1485 s->prob.segpred[s->above_segpred_ctx[col] +
1486 s->left_segpred_ctx[row7]]))) {
1487 if (!s->errorres && !s->segmentation.ignore_refmap) {
1489 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1491 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1492 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1493 for (y = 0; y < h4; y++) {
1494 int idx_base = (y + row) * 8 * s->sb_cols + col;
1495 for (x = 0; x < w4; x++)
1496 pred = FFMIN(pred, refsegmap[idx_base + x]);
1498 av_assert1(pred < 8);
1504 memset(&s->above_segpred_ctx[col], 1, w4);
1505 memset(&s->left_segpred_ctx[row7], 1, h4);
1507 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1510 memset(&s->above_segpred_ctx[col], 0, w4);
1511 memset(&s->left_segpred_ctx[row7], 0, h4);
1513 if (s->segmentation.enabled &&
1514 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1515 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1516 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1519 b->skip = s->segmentation.enabled &&
1520 s->segmentation.feat[b->seg_id].skip_enabled;
1522 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1523 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1524 s->counts.skip[c][b->skip]++;
1527 if (s->keyframe || s->intraonly) {
1529 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1530 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1534 if (have_a && have_l) {
1535 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1538 c = have_a ? 2 * s->above_intra_ctx[col] :
1539 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1541 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1542 s->counts.intra[c][bit]++;
1546 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1550 c = (s->above_skip_ctx[col] ? max_tx :
1551 s->above_txfm_ctx[col]) +
1552 (s->left_skip_ctx[row7] ? max_tx :
1553 s->left_txfm_ctx[row7]) > max_tx;
1555 c = s->above_skip_ctx[col] ? 1 :
1556 (s->above_txfm_ctx[col] * 2 > max_tx);
1558 } else if (have_l) {
1559 c = s->left_skip_ctx[row7] ? 1 :
1560 (s->left_txfm_ctx[row7] * 2 > max_tx);
1566 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1568 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1570 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1572 s->counts.tx32p[c][b->tx]++;
1575 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1577 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1578 s->counts.tx16p[c][b->tx]++;
1581 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1582 s->counts.tx8p[c][b->tx]++;
1589 b->tx = FFMIN(max_tx, s->txfmmode);
1592 if (s->keyframe || s->intraonly) {
1593 uint8_t *a = &s->above_mode_ctx[col * 2];
1594 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1597 if (b->bs > BS_8x8) {
1598 // FIXME the memory storage intermediates here aren't really
1599 // necessary, they're just there to make the code slightly
1601 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1602 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1603 if (b->bs != BS_8x4) {
1604 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1605 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1606 l[0] = a[1] = b->mode[1];
1608 l[0] = a[1] = b->mode[1] = b->mode[0];
1610 if (b->bs != BS_4x8) {
1611 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1612 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1613 if (b->bs != BS_8x4) {
1614 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1615 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1616 l[1] = a[1] = b->mode[3];
1618 l[1] = a[1] = b->mode[3] = b->mode[2];
1621 b->mode[2] = b->mode[0];
1622 l[1] = a[1] = b->mode[3] = b->mode[1];
1625 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1626 vp9_default_kf_ymode_probs[*a][*l]);
1627 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1628 // FIXME this can probably be optimized
1629 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1630 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1632 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1633 vp9_default_kf_uvmode_probs[b->mode[3]]);
1634 } else if (b->intra) {
1636 if (b->bs > BS_8x8) {
1637 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1638 s->prob.p.y_mode[0]);
1639 s->counts.y_mode[0][b->mode[0]]++;
1640 if (b->bs != BS_8x4) {
1641 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1642 s->prob.p.y_mode[0]);
1643 s->counts.y_mode[0][b->mode[1]]++;
1645 b->mode[1] = b->mode[0];
1647 if (b->bs != BS_4x8) {
1648 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1649 s->prob.p.y_mode[0]);
1650 s->counts.y_mode[0][b->mode[2]]++;
1651 if (b->bs != BS_8x4) {
1652 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1653 s->prob.p.y_mode[0]);
1654 s->counts.y_mode[0][b->mode[3]]++;
1656 b->mode[3] = b->mode[2];
1659 b->mode[2] = b->mode[0];
1660 b->mode[3] = b->mode[1];
1663 static const uint8_t size_group[10] = {
1664 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1666 int sz = size_group[b->bs];
1668 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1669 s->prob.p.y_mode[sz]);
1670 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1671 s->counts.y_mode[sz][b->mode[3]]++;
1673 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1674 s->prob.p.uv_mode[b->mode[3]]);
1675 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1677 static const uint8_t inter_mode_ctx_lut[14][14] = {
1678 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1684 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1686 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1687 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1688 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1689 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1690 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1691 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1694 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1695 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1697 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1699 // read comp_pred flag
1700 if (s->comppredmode != PRED_SWITCHABLE) {
1701 b->comp = s->comppredmode == PRED_COMPREF;
1705 // FIXME add intra as ref=0xff (or -1) to make these easier?
1708 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1710 } else if (s->above_comp_ctx[col]) {
1711 c = 2 + (s->left_intra_ctx[row7] ||
1712 s->left_ref_ctx[row7] == s->fixcompref);
1713 } else if (s->left_comp_ctx[row7]) {
1714 c = 2 + (s->above_intra_ctx[col] ||
1715 s->above_ref_ctx[col] == s->fixcompref);
1717 c = (!s->above_intra_ctx[col] &&
1718 s->above_ref_ctx[col] == s->fixcompref) ^
1719 (!s->left_intra_ctx[row7] &&
1720 s->left_ref_ctx[row & 7] == s->fixcompref);
1723 c = s->above_comp_ctx[col] ? 3 :
1724 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1726 } else if (have_l) {
1727 c = s->left_comp_ctx[row7] ? 3 :
1728 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1732 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1733 s->counts.comp[c][b->comp]++;
1736 // read actual references
1737 // FIXME probably cache a few variables here to prevent repetitive
1738 // memory accesses below
1739 if (b->comp) /* two references */ {
1740 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1742 b->ref[fix_idx] = s->fixcompref;
1743 // FIXME can this codeblob be replaced by some sort of LUT?
1746 if (s->above_intra_ctx[col]) {
1747 if (s->left_intra_ctx[row7]) {
1750 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1752 } else if (s->left_intra_ctx[row7]) {
1753 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1755 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1757 if (refl == refa && refa == s->varcompref[1]) {
1759 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1760 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1761 (refl == s->fixcompref && refa == s->varcompref[0])) {
1764 c = (refa == refl) ? 3 : 1;
1766 } else if (!s->left_comp_ctx[row7]) {
1767 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1770 c = (refl == s->varcompref[1] &&
1771 refa != s->varcompref[1]) ? 2 : 4;
1773 } else if (!s->above_comp_ctx[col]) {
1774 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1777 c = (refa == s->varcompref[1] &&
1778 refl != s->varcompref[1]) ? 2 : 4;
1781 c = (refl == refa) ? 4 : 2;
1785 if (s->above_intra_ctx[col]) {
1787 } else if (s->above_comp_ctx[col]) {
1788 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1790 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1793 } else if (have_l) {
1794 if (s->left_intra_ctx[row7]) {
1796 } else if (s->left_comp_ctx[row7]) {
1797 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1799 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1804 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1805 b->ref[var_idx] = s->varcompref[bit];
1806 s->counts.comp_ref[c][bit]++;
1807 } else /* single reference */ {
1810 if (have_a && !s->above_intra_ctx[col]) {
1811 if (have_l && !s->left_intra_ctx[row7]) {
1812 if (s->left_comp_ctx[row7]) {
1813 if (s->above_comp_ctx[col]) {
1814 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1815 !s->above_ref_ctx[col]);
1817 c = (3 * !s->above_ref_ctx[col]) +
1818 (!s->fixcompref || !s->left_ref_ctx[row7]);
1820 } else if (s->above_comp_ctx[col]) {
1821 c = (3 * !s->left_ref_ctx[row7]) +
1822 (!s->fixcompref || !s->above_ref_ctx[col]);
1824 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1826 } else if (s->above_intra_ctx[col]) {
1828 } else if (s->above_comp_ctx[col]) {
1829 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1831 c = 4 * (!s->above_ref_ctx[col]);
1833 } else if (have_l && !s->left_intra_ctx[row7]) {
1834 if (s->left_intra_ctx[row7]) {
1836 } else if (s->left_comp_ctx[row7]) {
1837 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1839 c = 4 * (!s->left_ref_ctx[row7]);
1844 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1845 s->counts.single_ref[c][0][bit]++;
1849 // FIXME can this codeblob be replaced by some sort of LUT?
1852 if (s->left_intra_ctx[row7]) {
1853 if (s->above_intra_ctx[col]) {
1855 } else if (s->above_comp_ctx[col]) {
1856 c = 1 + 2 * (s->fixcompref == 1 ||
1857 s->above_ref_ctx[col] == 1);
1858 } else if (!s->above_ref_ctx[col]) {
1861 c = 4 * (s->above_ref_ctx[col] == 1);
1863 } else if (s->above_intra_ctx[col]) {
1864 if (s->left_intra_ctx[row7]) {
1866 } else if (s->left_comp_ctx[row7]) {
1867 c = 1 + 2 * (s->fixcompref == 1 ||
1868 s->left_ref_ctx[row7] == 1);
1869 } else if (!s->left_ref_ctx[row7]) {
1872 c = 4 * (s->left_ref_ctx[row7] == 1);
1874 } else if (s->above_comp_ctx[col]) {
1875 if (s->left_comp_ctx[row7]) {
1876 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1877 c = 3 * (s->fixcompref == 1 ||
1878 s->left_ref_ctx[row7] == 1);
1882 } else if (!s->left_ref_ctx[row7]) {
1883 c = 1 + 2 * (s->fixcompref == 1 ||
1884 s->above_ref_ctx[col] == 1);
1886 c = 3 * (s->left_ref_ctx[row7] == 1) +
1887 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1889 } else if (s->left_comp_ctx[row7]) {
1890 if (!s->above_ref_ctx[col]) {
1891 c = 1 + 2 * (s->fixcompref == 1 ||
1892 s->left_ref_ctx[row7] == 1);
1894 c = 3 * (s->above_ref_ctx[col] == 1) +
1895 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1897 } else if (!s->above_ref_ctx[col]) {
1898 if (!s->left_ref_ctx[row7]) {
1901 c = 4 * (s->left_ref_ctx[row7] == 1);
1903 } else if (!s->left_ref_ctx[row7]) {
1904 c = 4 * (s->above_ref_ctx[col] == 1);
1906 c = 2 * (s->left_ref_ctx[row7] == 1) +
1907 2 * (s->above_ref_ctx[col] == 1);
1910 if (s->above_intra_ctx[col] ||
1911 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1913 } else if (s->above_comp_ctx[col]) {
1914 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1916 c = 4 * (s->above_ref_ctx[col] == 1);
1919 } else if (have_l) {
1920 if (s->left_intra_ctx[row7] ||
1921 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1923 } else if (s->left_comp_ctx[row7]) {
1924 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1926 c = 4 * (s->left_ref_ctx[row7] == 1);
1931 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1932 s->counts.single_ref[c][1][bit]++;
1933 b->ref[0] = 1 + bit;
1938 if (b->bs <= BS_8x8) {
1939 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1940 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1942 static const uint8_t off[10] = {
1943 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1946 // FIXME this needs to use the LUT tables from find_ref_mvs
1947 // because not all are -1,0/0,-1
1948 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1949 [s->left_mode_ctx[row7 + off[b->bs]]];
1951 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1952 s->prob.p.mv_mode[c]);
1953 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1954 s->counts.mv_mode[c][b->mode[0] - 10]++;
1958 if (s->filtermode == FILTER_SWITCHABLE) {
1961 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1962 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1963 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1964 s->left_filter_ctx[row7] : 3;
1966 c = s->above_filter_ctx[col];
1968 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1969 c = s->left_filter_ctx[row7];
1974 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1975 s->prob.p.filter[c]);
1976 s->counts.filter[c][filter_id]++;
1977 b->filter = vp9_filter_lut[filter_id];
1979 b->filter = s->filtermode;
1982 if (b->bs > BS_8x8) {
1983 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1985 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1986 s->prob.p.mv_mode[c]);
1987 s->counts.mv_mode[c][b->mode[0] - 10]++;
1988 fill_mv(s, b->mv[0], b->mode[0], 0);
1990 if (b->bs != BS_8x4) {
1991 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1992 s->prob.p.mv_mode[c]);
1993 s->counts.mv_mode[c][b->mode[1] - 10]++;
1994 fill_mv(s, b->mv[1], b->mode[1], 1);
1996 b->mode[1] = b->mode[0];
1997 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1998 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2001 if (b->bs != BS_4x8) {
2002 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2003 s->prob.p.mv_mode[c]);
2004 s->counts.mv_mode[c][b->mode[2] - 10]++;
2005 fill_mv(s, b->mv[2], b->mode[2], 2);
2007 if (b->bs != BS_8x4) {
2008 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2009 s->prob.p.mv_mode[c]);
2010 s->counts.mv_mode[c][b->mode[3] - 10]++;
2011 fill_mv(s, b->mv[3], b->mode[3], 3);
2013 b->mode[3] = b->mode[2];
2014 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2015 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2018 b->mode[2] = b->mode[0];
2019 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2020 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2021 b->mode[3] = b->mode[1];
2022 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2023 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2026 fill_mv(s, b->mv[0], b->mode[0], -1);
2027 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2028 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2029 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2030 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2031 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2032 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2035 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2039 #define SPLAT_CTX(var, val, n) \
2041 case 1: var = val; break; \
2042 case 2: AV_WN16A(&var, val * 0x0101); break; \
2043 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2044 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2046 uint64_t v64 = val * 0x0101010101010101ULL; \
2047 AV_WN64A( &var, v64); \
2048 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2053 #define SPLAT_CTX(var, val, n) \
2055 case 1: var = val; break; \
2056 case 2: AV_WN16A(&var, val * 0x0101); break; \
2057 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2059 uint32_t v32 = val * 0x01010101; \
2060 AV_WN32A( &var, v32); \
2061 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2065 uint32_t v32 = val * 0x01010101; \
2066 AV_WN32A( &var, v32); \
2067 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2068 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2069 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2075 switch (bwh_tab[1][b->bs][0]) {
2076 #define SET_CTXS(dir, off, n) \
2078 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2079 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2080 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2081 if (!s->keyframe && !s->intraonly) { \
2082 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2083 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2084 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2086 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2087 if (s->filtermode == FILTER_SWITCHABLE) { \
2088 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2093 case 1: SET_CTXS(above, col, 1); break;
2094 case 2: SET_CTXS(above, col, 2); break;
2095 case 4: SET_CTXS(above, col, 4); break;
2096 case 8: SET_CTXS(above, col, 8); break;
2098 switch (bwh_tab[1][b->bs][1]) {
2099 case 1: SET_CTXS(left, row7, 1); break;
2100 case 2: SET_CTXS(left, row7, 2); break;
2101 case 4: SET_CTXS(left, row7, 4); break;
2102 case 8: SET_CTXS(left, row7, 8); break;
2107 if (!s->keyframe && !s->intraonly) {
2108 if (b->bs > BS_8x8) {
2109 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2111 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2112 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2113 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2114 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2115 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2116 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2117 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2118 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2120 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2122 for (n = 0; n < w4 * 2; n++) {
2123 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2124 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2126 for (n = 0; n < h4 * 2; n++) {
2127 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2128 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2134 for (y = 0; y < h4; y++) {
2135 int x, o = (row + y) * s->sb_cols * 8 + col;
2136 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2139 for (x = 0; x < w4; x++) {
2143 } else if (b->comp) {
2144 for (x = 0; x < w4; x++) {
2145 mv[x].ref[0] = b->ref[0];
2146 mv[x].ref[1] = b->ref[1];
2147 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2148 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2151 for (x = 0; x < w4; x++) {
2152 mv[x].ref[0] = b->ref[0];
2154 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2160 // FIXME merge cnt/eob arguments?
2161 static av_always_inline int
2162 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2163 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2164 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2165 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2166 const int16_t *band_counts, const int16_t *qmul)
2168 int i = 0, band = 0, band_left = band_counts[band];
2169 uint8_t *tp = p[0][nnz];
2170 uint8_t cache[1024];
2175 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2176 eob[band][nnz][val]++;
2181 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2182 cnt[band][nnz][0]++;
2184 band_left = band_counts[++band];
2186 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2188 if (++i == n_coeffs)
2189 break; //invalid input; blocks should end with EOB
2194 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2195 cnt[band][nnz][1]++;
2199 // fill in p[3-10] (model fill) - only once per frame for each pos
2201 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2203 cnt[band][nnz][2]++;
2204 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2205 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2206 cache[rc] = val = 2;
2208 val = 3 + vp56_rac_get_prob(c, tp[5]);
2211 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2213 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2214 val = 5 + vp56_rac_get_prob(c, 159);
2216 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2217 val += vp56_rac_get_prob(c, 145);
2221 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2222 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2223 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2224 val += (vp56_rac_get_prob(c, 148) << 1);
2225 val += vp56_rac_get_prob(c, 140);
2227 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2228 val += (vp56_rac_get_prob(c, 155) << 2);
2229 val += (vp56_rac_get_prob(c, 140) << 1);
2230 val += vp56_rac_get_prob(c, 135);
2232 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2233 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2234 val += (vp56_rac_get_prob(c, 157) << 3);
2235 val += (vp56_rac_get_prob(c, 141) << 2);
2236 val += (vp56_rac_get_prob(c, 134) << 1);
2237 val += vp56_rac_get_prob(c, 130);
2240 if (!is8bitsperpixel) {
2242 val += vp56_rac_get_prob(c, 255) << 17;
2243 val += vp56_rac_get_prob(c, 255) << 16;
2245 val += (vp56_rac_get_prob(c, 255) << 15);
2246 val += (vp56_rac_get_prob(c, 255) << 14);
2248 val += (vp56_rac_get_prob(c, 254) << 13);
2249 val += (vp56_rac_get_prob(c, 254) << 12);
2250 val += (vp56_rac_get_prob(c, 254) << 11);
2251 val += (vp56_rac_get_prob(c, 252) << 10);
2252 val += (vp56_rac_get_prob(c, 249) << 9);
2253 val += (vp56_rac_get_prob(c, 243) << 8);
2254 val += (vp56_rac_get_prob(c, 230) << 7);
2255 val += (vp56_rac_get_prob(c, 196) << 6);
2256 val += (vp56_rac_get_prob(c, 177) << 5);
2257 val += (vp56_rac_get_prob(c, 153) << 4);
2258 val += (vp56_rac_get_prob(c, 140) << 3);
2259 val += (vp56_rac_get_prob(c, 133) << 2);
2260 val += (vp56_rac_get_prob(c, 130) << 1);
2261 val += vp56_rac_get_prob(c, 129);
2265 #define STORE_COEF(c, i, v) do { \
2266 if (is8bitsperpixel) { \
2269 AV_WN32A(&c[i * 2], v); \
2273 band_left = band_counts[++band];
2275 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2277 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2278 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2280 } while (++i < n_coeffs);
2285 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2286 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2287 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2288 const int16_t (*nb)[2], const int16_t *band_counts,
2289 const int16_t *qmul)
2291 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2292 nnz, scan, nb, band_counts, qmul);
2295 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2296 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2297 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2298 const int16_t (*nb)[2], const int16_t *band_counts,
2299 const int16_t *qmul)
2301 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2302 nnz, scan, nb, band_counts, qmul);
2305 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2306 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2307 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2308 const int16_t (*nb)[2], const int16_t *band_counts,
2309 const int16_t *qmul)
2311 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2312 nnz, scan, nb, band_counts, qmul);
2315 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2316 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2317 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2318 const int16_t (*nb)[2], const int16_t *band_counts,
2319 const int16_t *qmul)
2321 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2322 nnz, scan, nb, band_counts, qmul);
2325 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2327 VP9Context *s = ctx->priv_data;
2329 int row = s->row, col = s->col;
2330 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2331 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2332 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2333 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2334 int end_x = FFMIN(2 * (s->cols - col), w4);
2335 int end_y = FFMIN(2 * (s->rows - row), h4);
2336 int n, pl, x, y, res;
2337 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2338 int tx = 4 * s->lossless + b->tx;
2339 const int16_t * const *yscans = vp9_scans[tx];
2340 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2341 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2342 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2343 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2344 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2345 static const int16_t band_counts[4][8] = {
2346 { 1, 2, 3, 4, 3, 16 - 13 },
2347 { 1, 2, 3, 4, 11, 64 - 21 },
2348 { 1, 2, 3, 4, 11, 256 - 21 },
2349 { 1, 2, 3, 4, 11, 1024 - 21 },
2351 const int16_t *y_band_counts = band_counts[b->tx];
2352 const int16_t *uv_band_counts = band_counts[b->uvtx];
2353 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2354 int total_coeff = 0;
2356 #define MERGE(la, end, step, rd) \
2357 for (n = 0; n < end; n += step) \
2358 la[n] = !!rd(&la[n])
2359 #define MERGE_CTX(step, rd) \
2361 MERGE(l, end_y, step, rd); \
2362 MERGE(a, end_x, step, rd); \
2365 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2366 for (n = 0, y = 0; y < end_y; y += step) { \
2367 for (x = 0; x < end_x; x += step, n += step * step) { \
2368 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2369 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2370 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2371 c, e, p, a[x] + l[y], yscans[txtp], \
2372 ynbs[txtp], y_band_counts, qmul[0]); \
2373 a[x] = l[y] = !!res; \
2374 total_coeff |= !!res; \
2376 AV_WN16A(&s->eob[n], res); \
2383 #define SPLAT(la, end, step, cond) \
2385 for (n = 1; n < end; n += step) \
2386 la[n] = la[n - 1]; \
2387 } else if (step == 4) { \
2389 for (n = 0; n < end; n += step) \
2390 AV_WN32A(&la[n], la[n] * 0x01010101); \
2392 for (n = 0; n < end; n += step) \
2393 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2395 } else /* step == 8 */ { \
2397 if (HAVE_FAST_64BIT) { \
2398 for (n = 0; n < end; n += step) \
2399 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2401 for (n = 0; n < end; n += step) { \
2402 uint32_t v32 = la[n] * 0x01010101; \
2403 AV_WN32A(&la[n], v32); \
2404 AV_WN32A(&la[n + 4], v32); \
2408 for (n = 0; n < end; n += step) \
2409 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2412 #define SPLAT_CTX(step) \
2414 SPLAT(a, end_x, step, end_x == w4); \
2415 SPLAT(l, end_y, step, end_y == h4); \
2421 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2424 MERGE_CTX(2, AV_RN16A);
2425 DECODE_Y_COEF_LOOP(2, 0,);
2429 MERGE_CTX(4, AV_RN32A);
2430 DECODE_Y_COEF_LOOP(4, 0,);
2434 MERGE_CTX(8, AV_RN64A);
2435 DECODE_Y_COEF_LOOP(8, 0, 32);
2440 #define DECODE_UV_COEF_LOOP(step, v) \
2441 for (n = 0, y = 0; y < end_y; y += step) { \
2442 for (x = 0; x < end_x; x += step, n += step * step) { \
2443 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2444 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2445 16 * step * step, c, e, p, a[x] + l[y], \
2446 uvscan, uvnb, uv_band_counts, qmul[1]); \
2447 a[x] = l[y] = !!res; \
2448 total_coeff |= !!res; \
2450 AV_WN16A(&s->uveob[pl][n], res); \
2452 s->uveob[pl][n] = res; \
2457 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2458 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2459 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2464 for (pl = 0; pl < 2; pl++) {
2465 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2466 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2469 DECODE_UV_COEF_LOOP(1,);
2472 MERGE_CTX(2, AV_RN16A);
2473 DECODE_UV_COEF_LOOP(2,);
2477 MERGE_CTX(4, AV_RN32A);
2478 DECODE_UV_COEF_LOOP(4,);
2482 MERGE_CTX(8, AV_RN64A);
2483 DECODE_UV_COEF_LOOP(8, 32);
2492 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2494 return decode_coeffs(ctx, 1);
2497 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2499 return decode_coeffs(ctx, 0);
2502 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2503 uint8_t *dst_edge, ptrdiff_t stride_edge,
2504 uint8_t *dst_inner, ptrdiff_t stride_inner,
2505 uint8_t *l, int col, int x, int w,
2506 int row, int y, enum TxfmMode tx,
2507 int p, int ss_h, int ss_v, int bytesperpixel)
2509 int have_top = row > 0 || y > 0;
2510 int have_left = col > s->tiling.tile_col_start || x > 0;
2511 int have_right = x < w - 1;
2513 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2514 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2515 { DC_127_PRED, VERT_PRED } },
2516 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2517 { HOR_PRED, HOR_PRED } },
2518 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2519 { LEFT_DC_PRED, DC_PRED } },
2520 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2521 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2522 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2523 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2524 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2525 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2526 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2527 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2528 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2529 { DC_127_PRED, VERT_LEFT_PRED } },
2530 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2531 { HOR_UP_PRED, HOR_UP_PRED } },
2532 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2533 { HOR_PRED, TM_VP8_PRED } },
2535 static const struct {
2536 uint8_t needs_left:1;
2537 uint8_t needs_top:1;
2538 uint8_t needs_topleft:1;
2539 uint8_t needs_topright:1;
2540 uint8_t invert_left:1;
2541 } edges[N_INTRA_PRED_MODES] = {
2542 [VERT_PRED] = { .needs_top = 1 },
2543 [HOR_PRED] = { .needs_left = 1 },
2544 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2545 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2546 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2547 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2548 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2549 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2550 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2551 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2552 [LEFT_DC_PRED] = { .needs_left = 1 },
2553 [TOP_DC_PRED] = { .needs_top = 1 },
2554 [DC_128_PRED] = { 0 },
2555 [DC_127_PRED] = { 0 },
2556 [DC_129_PRED] = { 0 }
2559 av_assert2(mode >= 0 && mode < 10);
2560 mode = mode_conv[mode][have_left][have_top];
2561 if (edges[mode].needs_top) {
2562 uint8_t *top, *topleft;
2563 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2564 int n_px_need_tr = 0;
2566 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2569 // if top of sb64-row, use s->intra_pred_data[] instead of
2570 // dst[-stride] for intra prediction (it contains pre- instead of
2571 // post-loopfilter data)
2573 top = !(row & 7) && !y ?
2574 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2575 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2577 topleft = !(row & 7) && !y ?
2578 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2579 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2580 &dst_inner[-stride_inner];
2584 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2585 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2586 n_px_need + n_px_need_tr <= n_px_have) {
2590 if (n_px_need <= n_px_have) {
2591 memcpy(*a, top, n_px_need * bytesperpixel);
2593 #define memset_bpp(c, i1, v, i2, num) do { \
2594 if (bytesperpixel == 1) { \
2595 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2597 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2598 for (n = 0; n < (num); n++) { \
2599 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2603 memcpy(*a, top, n_px_have * bytesperpixel);
2604 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2607 #define memset_val(c, val, num) do { \
2608 if (bytesperpixel == 1) { \
2609 memset((c), (val), (num)); \
2612 for (n = 0; n < (num); n++) { \
2613 AV_WN16A(&(c)[n * 2], (val)); \
2617 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2619 if (edges[mode].needs_topleft) {
2620 if (have_left && have_top) {
2621 #define assign_bpp(c, i1, v, i2) do { \
2622 if (bytesperpixel == 1) { \
2623 (c)[(i1)] = (v)[(i2)]; \
2625 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2628 assign_bpp(*a, -1, topleft, -1);
2630 #define assign_val(c, i, v) do { \
2631 if (bytesperpixel == 1) { \
2634 AV_WN16A(&(c)[(i) * 2], (v)); \
2637 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2640 if (tx == TX_4X4 && edges[mode].needs_topright) {
2641 if (have_top && have_right &&
2642 n_px_need + n_px_need_tr <= n_px_have) {
2643 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2645 memset_bpp(*a, 4, *a, 3, 4);
2650 if (edges[mode].needs_left) {
2652 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2653 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2654 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2656 if (edges[mode].invert_left) {
2657 if (n_px_need <= n_px_have) {
2658 for (i = 0; i < n_px_need; i++)
2659 assign_bpp(l, i, &dst[i * stride], -1);
2661 for (i = 0; i < n_px_have; i++)
2662 assign_bpp(l, i, &dst[i * stride], -1);
2663 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2666 if (n_px_need <= n_px_have) {
2667 for (i = 0; i < n_px_need; i++)
2668 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2670 for (i = 0; i < n_px_have; i++)
2671 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2672 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2676 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2683 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2684 ptrdiff_t uv_off, int bytesperpixel)
2686 VP9Context *s = ctx->priv_data;
2688 int row = s->row, col = s->col;
2689 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2690 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2691 int end_x = FFMIN(2 * (s->cols - col), w4);
2692 int end_y = FFMIN(2 * (s->rows - row), h4);
2693 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2694 int uvstep1d = 1 << b->uvtx, p;
2695 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2696 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2697 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2699 for (n = 0, y = 0; y < end_y; y += step1d) {
2700 uint8_t *ptr = dst, *ptr_r = dst_r;
2701 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2702 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2703 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2705 uint8_t *a = &a_buf[32];
2706 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2707 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2709 mode = check_intra_mode(s, mode, &a, ptr_r,
2710 s->frames[CUR_FRAME].tf.f->linesize[0],
2711 ptr, s->y_stride, l,
2712 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2713 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2715 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2716 s->block + 16 * n * bytesperpixel, eob);
2718 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2719 dst += 4 * step1d * s->y_stride;
2726 step = 1 << (b->uvtx * 2);
2727 for (p = 0; p < 2; p++) {
2728 dst = s->dst[1 + p];
2729 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2730 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2731 uint8_t *ptr = dst, *ptr_r = dst_r;
2732 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2733 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2734 int mode = b->uvmode;
2735 uint8_t *a = &a_buf[32];
2736 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2738 mode = check_intra_mode(s, mode, &a, ptr_r,
2739 s->frames[CUR_FRAME].tf.f->linesize[1],
2740 ptr, s->uv_stride, l, col, x, w4, row, y,
2741 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2742 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2744 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2745 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2747 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2748 dst += 4 * uvstep1d * s->uv_stride;
2753 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2755 intra_recon(ctx, y_off, uv_off, 1);
2758 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2760 intra_recon(ctx, y_off, uv_off, 2);
2763 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2764 uint8_t *dst, ptrdiff_t dst_stride,
2765 const uint8_t *ref, ptrdiff_t ref_stride,
2766 ThreadFrame *ref_frame,
2767 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2768 int px, int py, int pw, int ph,
2769 int bw, int bh, int w, int h, int bytesperpixel,
2770 const uint16_t *scale, const uint8_t *step)
2772 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2774 int refbw_m1, refbh_m1;
2778 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2779 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2780 // BUG libvpx seems to scale the two components separately. This introduces
2781 // rounding errors but we have to reproduce them to be exactly compatible
2782 // with the output from libvpx...
2783 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2784 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2788 ref += y * ref_stride + x * bytesperpixel;
2791 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2792 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2793 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2794 // we use +7 because the last 7 pixels of each sbrow can be changed in
2795 // the longest loopfilter of the next sbrow
2796 th = (y + refbh_m1 + 4 + 7) >> 6;
2797 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2798 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2799 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2800 ref - 3 * ref_stride - 3 * bytesperpixel,
2802 refbw_m1 + 8, refbh_m1 + 8,
2803 x - 3, y - 3, w, h);
2804 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2807 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2810 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2811 uint8_t *dst_u, uint8_t *dst_v,
2812 ptrdiff_t dst_stride,
2813 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2814 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2815 ThreadFrame *ref_frame,
2816 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2817 int px, int py, int pw, int ph,
2818 int bw, int bh, int w, int h, int bytesperpixel,
2819 const uint16_t *scale, const uint8_t *step)
2822 int refbw_m1, refbh_m1;
2827 // BUG https://code.google.com/p/webm/issues/detail?id=820
2828 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2829 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2831 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2832 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2835 // BUG https://code.google.com/p/webm/issues/detail?id=820
2836 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2837 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2839 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2840 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2845 ref_u += y * src_stride_u + x * bytesperpixel;
2846 ref_v += y * src_stride_v + x * bytesperpixel;
2849 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2850 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2851 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2852 // we use +7 because the last 7 pixels of each sbrow can be changed in
2853 // the longest loopfilter of the next sbrow
2854 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2855 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2856 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2857 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2858 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2860 refbw_m1 + 8, refbh_m1 + 8,
2861 x - 3, y - 3, w, h);
2862 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2863 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2865 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2866 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2868 refbw_m1 + 8, refbh_m1 + 8,
2869 x - 3, y - 3, w, h);
2870 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2871 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2873 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2874 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2878 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2879 px, py, pw, ph, bw, bh, w, h, i) \
2880 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2881 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2882 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2883 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2884 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2885 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2886 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2887 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2889 #define FN(x) x##_scaled_8bpp
2890 #define BYTES_PER_PIXEL 1
2891 #include "vp9_mc_template.c"
2893 #undef BYTES_PER_PIXEL
2894 #define FN(x) x##_scaled_16bpp
2895 #define BYTES_PER_PIXEL 2
2896 #include "vp9_mc_template.c"
2898 #undef mc_chroma_dir
2900 #undef BYTES_PER_PIXEL
2903 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2904 uint8_t *dst, ptrdiff_t dst_stride,
2905 const uint8_t *ref, ptrdiff_t ref_stride,
2906 ThreadFrame *ref_frame,
2907 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2908 int bw, int bh, int w, int h, int bytesperpixel)
2910 int mx = mv->x, my = mv->y, th;
2914 ref += y * ref_stride + x * bytesperpixel;
2917 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2918 // we use +7 because the last 7 pixels of each sbrow can be changed in
2919 // the longest loopfilter of the next sbrow
2920 th = (y + bh + 4 * !!my + 7) >> 6;
2921 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2922 if (x < !!mx * 3 || y < !!my * 3 ||
2923 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2924 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2925 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2927 bw + !!mx * 7, bh + !!my * 7,
2928 x - !!mx * 3, y - !!my * 3, w, h);
2929 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2932 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2935 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2936 uint8_t *dst_u, uint8_t *dst_v,
2937 ptrdiff_t dst_stride,
2938 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2939 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2940 ThreadFrame *ref_frame,
2941 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2942 int bw, int bh, int w, int h, int bytesperpixel)
2944 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2948 ref_u += y * src_stride_u + x * bytesperpixel;
2949 ref_v += y * src_stride_v + x * bytesperpixel;
2952 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2953 // we use +7 because the last 7 pixels of each sbrow can be changed in
2954 // the longest loopfilter of the next sbrow
2955 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2956 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2957 if (x < !!mx * 3 || y < !!my * 3 ||
2958 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2959 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2960 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2962 bw + !!mx * 7, bh + !!my * 7,
2963 x - !!mx * 3, y - !!my * 3, w, h);
2964 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2965 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2967 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2968 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2970 bw + !!mx * 7, bh + !!my * 7,
2971 x - !!mx * 3, y - !!my * 3, w, h);
2972 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2973 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2975 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2976 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2980 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2981 px, py, pw, ph, bw, bh, w, h, i) \
2982 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2983 mv, bw, bh, w, h, bytesperpixel)
2984 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2985 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2986 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2987 row, col, mv, bw, bh, w, h, bytesperpixel)
2989 #define FN(x) x##_8bpp
2990 #define BYTES_PER_PIXEL 1
2991 #include "vp9_mc_template.c"
2993 #undef BYTES_PER_PIXEL
2994 #define FN(x) x##_16bpp
2995 #define BYTES_PER_PIXEL 2
2996 #include "vp9_mc_template.c"
2997 #undef mc_luma_dir_dir
2998 #undef mc_chroma_dir_dir
3000 #undef BYTES_PER_PIXEL
3003 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3005 VP9Context *s = ctx->priv_data;
3007 int row = s->row, col = s->col;
3009 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3010 if (bytesperpixel == 1) {
3011 inter_pred_scaled_8bpp(ctx);
3013 inter_pred_scaled_16bpp(ctx);
3016 if (bytesperpixel == 1) {
3017 inter_pred_8bpp(ctx);
3019 inter_pred_16bpp(ctx);
3023 /* mostly copied intra_recon() */
3025 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3026 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3027 int end_x = FFMIN(2 * (s->cols - col), w4);
3028 int end_y = FFMIN(2 * (s->rows - row), h4);
3029 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3030 int uvstep1d = 1 << b->uvtx, p;
3031 uint8_t *dst = s->dst[0];
3034 for (n = 0, y = 0; y < end_y; y += step1d) {
3036 for (x = 0; x < end_x; x += step1d,
3037 ptr += 4 * step1d * bytesperpixel, n += step) {
3038 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3041 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3042 s->block + 16 * n * bytesperpixel, eob);
3044 dst += 4 * s->y_stride * step1d;
3050 step = 1 << (b->uvtx * 2);
3051 for (p = 0; p < 2; p++) {
3052 dst = s->dst[p + 1];
3053 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3055 for (x = 0; x < end_x; x += uvstep1d,
3056 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3057 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3060 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3061 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3063 dst += 4 * uvstep1d * s->uv_stride;
3069 static void inter_recon_8bpp(AVCodecContext *ctx)
3071 inter_recon(ctx, 1);
3074 static void inter_recon_16bpp(AVCodecContext *ctx)
3076 inter_recon(ctx, 2);
3079 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3080 int row_and_7, int col_and_7,
3081 int w, int h, int col_end, int row_end,
3082 enum TxfmMode tx, int skip_inter)
3084 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3085 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3087 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3088 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3089 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3090 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3092 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3093 // edges. This means that for UV, we work on two subsampled blocks at
3094 // a time, and we only use the topleft block's mode information to set
3095 // things like block strength. Thus, for any block size smaller than
3096 // 16x16, ignore the odd portion of the block.
3097 if (tx == TX_4X4 && (ss_v | ss_h)) {
3112 if (tx == TX_4X4 && !skip_inter) {
3113 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3114 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3115 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3117 for (y = row_and_7; y < h + row_and_7; y++) {
3118 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3120 mask[0][y][1] |= m_row_8;
3121 mask[0][y][2] |= m_row_4;
3122 // for odd lines, if the odd col is not being filtered,
3123 // skip odd row also:
3130 // if a/c are even row/col and b/d are odd, and d is skipped,
3131 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3132 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3133 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3135 mask[1][y][col_mask_id] |= m_col;
3138 mask[0][y][3] |= m_col;
3140 if (ss_h && (col_end & 1))
3141 mask[1][y][3] |= (t << (w - 1)) - t;
3143 mask[1][y][3] |= m_col;
3147 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3150 int mask_id = (tx == TX_8X8);
3151 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3152 int l2 = tx + ss_h - 1, step1d;
3153 int m_row = m_col & masks[l2];
3155 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3156 // 8wd loopfilter to prevent going off the visible edge.
3157 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3158 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3159 int m_row_8 = m_row - m_row_16;
3161 for (y = row_and_7; y < h + row_and_7; y++) {
3162 mask[0][y][0] |= m_row_16;
3163 mask[0][y][1] |= m_row_8;
3166 for (y = row_and_7; y < h + row_and_7; y++)
3167 mask[0][y][mask_id] |= m_row;
3172 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3173 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3174 mask[1][y][0] |= m_col;
3175 if (y - row_and_7 == h - 1)
3176 mask[1][y][1] |= m_col;
3178 for (y = row_and_7; y < h + row_and_7; y += step1d)
3179 mask[1][y][mask_id] |= m_col;
3181 } else if (tx != TX_4X4) {
3184 mask_id = (tx == TX_8X8) || (h == ss_v);
3185 mask[1][row_and_7][mask_id] |= m_col;
3186 mask_id = (tx == TX_8X8) || (w == ss_h);
3187 for (y = row_and_7; y < h + row_and_7; y++)
3188 mask[0][y][mask_id] |= t;
3190 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3192 for (y = row_and_7; y < h + row_and_7; y++) {
3193 mask[0][y][2] |= t4;
3194 mask[0][y][1] |= t8;
3196 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3201 static void decode_b(AVCodecContext *ctx, int row, int col,
3202 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3203 enum BlockLevel bl, enum BlockPartition bp)
3205 VP9Context *s = ctx->priv_data;
3207 enum BlockSize bs = bl * 3 + bp;
3208 int bytesperpixel = s->bytesperpixel;
3209 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3211 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3217 s->min_mv.x = -(128 + col * 64);
3218 s->min_mv.y = -(128 + row * 64);
3219 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3220 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3226 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3227 (s->ss_v && h4 * 2 == (1 << b->tx)));
3232 if (bytesperpixel == 1) {
3233 has_coeffs = decode_coeffs_8bpp(ctx);
3235 has_coeffs = decode_coeffs_16bpp(ctx);
3237 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3239 memset(&s->above_skip_ctx[col], 1, w4);
3240 memset(&s->left_skip_ctx[s->row7], 1, h4);
3245 #define SPLAT_ZERO_CTX(v, n) \
3247 case 1: v = 0; break; \
3248 case 2: AV_ZERO16(&v); break; \
3249 case 4: AV_ZERO32(&v); break; \
3250 case 8: AV_ZERO64(&v); break; \
3251 case 16: AV_ZERO128(&v); break; \
3253 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3255 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3256 if (s->ss_##dir2) { \
3257 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3258 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3260 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3261 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3266 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3267 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3268 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3269 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3272 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3273 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3274 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3275 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3280 s->block += w4 * h4 * 64 * bytesperpixel;
3281 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3282 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3283 s->eob += 4 * w4 * h4;
3284 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3285 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3291 // emulated overhangs if the stride of the target buffer can't hold. This
3292 // makes it possible to support emu-edge and so on even if we have large block
3294 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3295 (row + h4) > s->rows;
3296 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3297 (row + h4) > s->rows;
3299 s->dst[0] = s->tmp_y;
3302 s->dst[0] = f->data[0] + yoff;
3303 s->y_stride = f->linesize[0];
3306 s->dst[1] = s->tmp_uv[0];
3307 s->dst[2] = s->tmp_uv[1];
3310 s->dst[1] = f->data[1] + uvoff;
3311 s->dst[2] = f->data[2] + uvoff;
3312 s->uv_stride = f->linesize[1];
3316 intra_recon_16bpp(ctx, yoff, uvoff);
3318 intra_recon_8bpp(ctx, yoff, uvoff);
3322 inter_recon_16bpp(ctx);
3324 inter_recon_8bpp(ctx);
3328 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3330 for (n = 0; o < w; n++) {
3335 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3336 s->tmp_y + o, 128, h, 0, 0);
3337 o += bw * bytesperpixel;
3342 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3343 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3345 for (n = s->ss_h; o < w; n++) {
3350 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3351 s->tmp_uv[0] + o, 128, h, 0, 0);
3352 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3353 s->tmp_uv[1] + o, 128, h, 0, 0);
3354 o += bw * bytesperpixel;
3359 // pick filter level and find edges to apply filter to
3360 if (s->filter.level &&
3361 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3362 [b->mode[3] != ZEROMV]) > 0) {
3363 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3364 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3366 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3367 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3368 if (s->ss_h || s->ss_v)
3369 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3370 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3371 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3372 b->uvtx, skip_inter);
3374 if (!s->filter.lim_lut[lvl]) {
3375 int sharp = s->filter.sharpness;
3379 limit >>= (sharp + 3) >> 2;
3380 limit = FFMIN(limit, 9 - sharp);
3382 limit = FFMAX(limit, 1);
3384 s->filter.lim_lut[lvl] = limit;
3385 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3391 s->block += w4 * h4 * 64 * bytesperpixel;
3392 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3393 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3394 s->eob += 4 * w4 * h4;
3395 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3396 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3400 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3401 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3403 VP9Context *s = ctx->priv_data;
3404 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3405 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3406 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3407 s->prob.p.partition[bl][c];
3408 enum BlockPartition bp;
3409 ptrdiff_t hbs = 4 >> bl;
3410 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3411 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3412 int bytesperpixel = s->bytesperpixel;
3415 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3416 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3417 } else if (col + hbs < s->cols) { // FIXME why not <=?
3418 if (row + hbs < s->rows) { // FIXME why not <=?
3419 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3421 case PARTITION_NONE:
3422 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3425 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3426 yoff += hbs * 8 * y_stride;
3427 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3428 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3431 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3432 yoff += hbs * 8 * bytesperpixel;
3433 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3434 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3436 case PARTITION_SPLIT:
3437 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3438 decode_sb(ctx, row, col + hbs, lflvl,
3439 yoff + 8 * hbs * bytesperpixel,
3440 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3441 yoff += hbs * 8 * y_stride;
3442 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3443 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3444 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3445 yoff + 8 * hbs * bytesperpixel,
3446 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3451 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3452 bp = PARTITION_SPLIT;
3453 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3454 decode_sb(ctx, row, col + hbs, lflvl,
3455 yoff + 8 * hbs * bytesperpixel,
3456 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3459 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3461 } else if (row + hbs < s->rows) { // FIXME why not <=?
3462 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3463 bp = PARTITION_SPLIT;
3464 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3465 yoff += hbs * 8 * y_stride;
3466 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3467 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3470 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3473 bp = PARTITION_SPLIT;
3474 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3476 s->counts.partition[bl][c][bp]++;
3479 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3480 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3482 VP9Context *s = ctx->priv_data;
3484 ptrdiff_t hbs = 4 >> bl;
3485 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3486 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3487 int bytesperpixel = s->bytesperpixel;
3490 av_assert2(b->bl == BL_8X8);
3491 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3492 } else if (s->b->bl == bl) {
3493 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3494 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3495 yoff += hbs * 8 * y_stride;
3496 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3497 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3498 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3499 yoff += hbs * 8 * bytesperpixel;
3500 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3501 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3504 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3505 if (col + hbs < s->cols) { // FIXME why not <=?
3506 if (row + hbs < s->rows) {
3507 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3508 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3509 yoff += hbs * 8 * y_stride;
3510 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3511 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3512 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3513 yoff + 8 * hbs * bytesperpixel,
3514 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3516 yoff += hbs * 8 * bytesperpixel;
3517 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3518 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3520 } else if (row + hbs < s->rows) {
3521 yoff += hbs * 8 * y_stride;
3522 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3523 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3528 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3529 uint8_t *lvl, uint8_t (*mask)[4],
3530 uint8_t *dst, ptrdiff_t ls)
3532 int y, x, bytesperpixel = s->bytesperpixel;
3534 // filter edges between columns (e.g. block1 | block2)
3535 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3536 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3537 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3538 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3539 unsigned hm = hm1 | hm2 | hm13 | hm23;
3541 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3544 int L = *l, H = L >> 4;
3545 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3547 if (hmask1[0] & x) {
3548 if (hmask2[0] & x) {
3549 av_assert2(l[8 << ss_v] == L);
3550 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3552 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3554 } else if (hm2 & x) {
3557 E |= s->filter.mblim_lut[L] << 8;
3558 I |= s->filter.lim_lut[L] << 8;
3559 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3561 [0](ptr, ls, E, I, H);
3563 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3564 [0](ptr, ls, E, I, H);
3566 } else if (hm2 & x) {
3567 int L = l[8 << ss_v], H = L >> 4;
3568 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3570 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3571 [0](ptr + 8 * ls, ls, E, I, H);
3579 int L = *l, H = L >> 4;
3580 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3585 E |= s->filter.mblim_lut[L] << 8;
3586 I |= s->filter.lim_lut[L] << 8;
3587 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3589 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3591 } else if (hm23 & x) {
3592 int L = l[8 << ss_v], H = L >> 4;
3593 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3595 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3603 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3604 uint8_t *lvl, uint8_t (*mask)[4],
3605 uint8_t *dst, ptrdiff_t ls)
3607 int y, x, bytesperpixel = s->bytesperpixel;
3610 // filter edges between rows (e.g. ------)
3612 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3613 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3614 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3616 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3619 int L = *l, H = L >> 4;
3620 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3623 if (vmask[0] & (x << (1 + ss_h))) {
3624 av_assert2(l[1 + ss_h] == L);
3625 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3627 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3629 } else if (vm & (x << (1 + ss_h))) {
3632 E |= s->filter.mblim_lut[L] << 8;
3633 I |= s->filter.lim_lut[L] << 8;
3634 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3635 [!!(vmask[1] & (x << (1 + ss_h)))]
3636 [1](ptr, ls, E, I, H);
3638 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3639 [1](ptr, ls, E, I, H);
3641 } else if (vm & (x << (1 + ss_h))) {
3642 int L = l[1 + ss_h], H = L >> 4;
3643 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3645 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3646 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3651 int L = *l, H = L >> 4;
3652 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3654 if (vm3 & (x << (1 + ss_h))) {
3657 E |= s->filter.mblim_lut[L] << 8;
3658 I |= s->filter.lim_lut[L] << 8;
3659 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3661 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3663 } else if (vm3 & (x << (1 + ss_h))) {
3664 int L = l[1 + ss_h], H = L >> 4;
3665 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3667 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3680 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3681 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3683 VP9Context *s = ctx->priv_data;
3684 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3685 uint8_t *dst = f->data[0] + yoff;
3686 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3687 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3690 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3691 // if you think of them as acting on a 8x8 block max, we can interleave
3692 // each v/h within the single x loop, but that only works if we work on
3693 // 8 pixel blocks, and we won't always do that (we want at least 16px
3694 // to use SSE2 optimizations, perhaps 32 for AVX2)
3696 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3697 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3699 for (p = 0; p < 2; p++) {
3700 dst = f->data[1 + p] + uvoff;
3701 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3702 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3706 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3708 int sb_start = ( idx * n) >> log2_n;
3709 int sb_end = ((idx + 1) * n) >> log2_n;
3710 *start = FFMIN(sb_start, n) << 3;
3711 *end = FFMIN(sb_end, n) << 3;
3714 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3715 int max_count, int update_factor)
3717 unsigned ct = ct0 + ct1, p2, p1;
3723 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3724 p2 = av_clip(p2, 1, 255);
3725 ct = FFMIN(ct, max_count);
3726 update_factor = FASTDIV(update_factor * ct, max_count);
3728 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3729 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3732 static void adapt_probs(VP9Context *s)
3735 prob_context *p = &s->prob_ctx[s->framectxid].p;
3736 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3739 for (i = 0; i < 4; i++)
3740 for (j = 0; j < 2; j++)
3741 for (k = 0; k < 2; k++)
3742 for (l = 0; l < 6; l++)
3743 for (m = 0; m < 6; m++) {
3744 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3745 unsigned *e = s->counts.eob[i][j][k][l][m];
3746 unsigned *c = s->counts.coef[i][j][k][l][m];
3748 if (l == 0 && m >= 3) // dc only has 3 pt
3751 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3752 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3753 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3756 if (s->keyframe || s->intraonly) {
3757 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3758 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3759 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3760 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3765 for (i = 0; i < 3; i++)
3766 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3769 for (i = 0; i < 4; i++)
3770 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3773 if (s->comppredmode == PRED_SWITCHABLE) {
3774 for (i = 0; i < 5; i++)
3775 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3779 if (s->comppredmode != PRED_SINGLEREF) {
3780 for (i = 0; i < 5; i++)
3781 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3782 s->counts.comp_ref[i][1], 20, 128);
3785 if (s->comppredmode != PRED_COMPREF) {
3786 for (i = 0; i < 5; i++) {
3787 uint8_t *pp = p->single_ref[i];
3788 unsigned (*c)[2] = s->counts.single_ref[i];
3790 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3791 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3795 // block partitioning
3796 for (i = 0; i < 4; i++)
3797 for (j = 0; j < 4; j++) {
3798 uint8_t *pp = p->partition[i][j];
3799 unsigned *c = s->counts.partition[i][j];
3801 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3802 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3803 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3807 if (s->txfmmode == TX_SWITCHABLE) {
3808 for (i = 0; i < 2; i++) {
3809 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3811 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3812 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3813 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3814 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3815 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3816 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3820 // interpolation filter
3821 if (s->filtermode == FILTER_SWITCHABLE) {
3822 for (i = 0; i < 4; i++) {
3823 uint8_t *pp = p->filter[i];
3824 unsigned *c = s->counts.filter[i];
3826 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3827 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3832 for (i = 0; i < 7; i++) {
3833 uint8_t *pp = p->mv_mode[i];
3834 unsigned *c = s->counts.mv_mode[i];
3836 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3837 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3838 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3843 uint8_t *pp = p->mv_joint;
3844 unsigned *c = s->counts.mv_joint;
3846 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3847 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3848 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3852 for (i = 0; i < 2; i++) {
3854 unsigned *c, (*c2)[2], sum;
3856 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3857 s->counts.mv_comp[i].sign[1], 20, 128);
3859 pp = p->mv_comp[i].classes;
3860 c = s->counts.mv_comp[i].classes;
3861 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3862 adapt_prob(&pp[0], c[0], sum, 20, 128);
3864 adapt_prob(&pp[1], c[1], sum, 20, 128);
3866 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3867 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3869 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3870 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3872 adapt_prob(&pp[6], c[6], sum, 20, 128);
3873 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3874 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3875 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3877 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3878 s->counts.mv_comp[i].class0[1], 20, 128);
3879 pp = p->mv_comp[i].bits;
3880 c2 = s->counts.mv_comp[i].bits;
3881 for (j = 0; j < 10; j++)
3882 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3884 for (j = 0; j < 2; j++) {
3885 pp = p->mv_comp[i].class0_fp[j];
3886 c = s->counts.mv_comp[i].class0_fp[j];
3887 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3888 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3889 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3891 pp = p->mv_comp[i].fp;
3892 c = s->counts.mv_comp[i].fp;
3893 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3894 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3895 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3897 if (s->highprecisionmvs) {
3898 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3899 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3900 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3901 s->counts.mv_comp[i].hp[1], 20, 128);
3906 for (i = 0; i < 4; i++) {
3907 uint8_t *pp = p->y_mode[i];
3908 unsigned *c = s->counts.y_mode[i], sum, s2;
3910 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3911 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3912 sum -= c[TM_VP8_PRED];
3913 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3914 sum -= c[VERT_PRED];
3915 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3916 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3918 adapt_prob(&pp[3], s2, sum, 20, 128);
3920 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3921 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3922 sum -= c[DIAG_DOWN_LEFT_PRED];
3923 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3924 sum -= c[VERT_LEFT_PRED];
3925 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3926 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3930 for (i = 0; i < 10; i++) {
3931 uint8_t *pp = p->uv_mode[i];
3932 unsigned *c = s->counts.uv_mode[i], sum, s2;
3934 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3935 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3936 sum -= c[TM_VP8_PRED];
3937 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3938 sum -= c[VERT_PRED];
3939 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3940 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3942 adapt_prob(&pp[3], s2, sum, 20, 128);
3944 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3945 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3946 sum -= c[DIAG_DOWN_LEFT_PRED];
3947 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3948 sum -= c[VERT_LEFT_PRED];
3949 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3950 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3954 static void free_buffers(VP9Context *s)
3956 av_freep(&s->intra_pred_data[0]);
3957 av_freep(&s->b_base);
3958 av_freep(&s->block_base);
3961 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3963 VP9Context *s = ctx->priv_data;
3966 for (i = 0; i < 3; i++) {
3967 if (s->frames[i].tf.f->data[0])
3968 vp9_unref_frame(ctx, &s->frames[i]);
3969 av_frame_free(&s->frames[i].tf.f);
3971 for (i = 0; i < 8; i++) {
3972 if (s->refs[i].f->data[0])
3973 ff_thread_release_buffer(ctx, &s->refs[i]);
3974 av_frame_free(&s->refs[i].f);
3975 if (s->next_refs[i].f->data[0])
3976 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3977 av_frame_free(&s->next_refs[i].f);
3987 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3988 int *got_frame, AVPacket *pkt)
3990 const uint8_t *data = pkt->data;
3991 int size = pkt->size;
3992 VP9Context *s = ctx->priv_data;
3993 int res, tile_row, tile_col, i, ref, row, col;
3994 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map
3995 && s->frames[REF_FRAME_SEGMAP].segmentation_map;
3996 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4000 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4002 } else if (res == 0) {
4003 if (!s->refs[ref].f->data[0]) {
4004 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4005 return AVERROR_INVALIDDATA;
4007 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4009 ((AVFrame *)frame)->pkt_pts = pkt->pts;
4010 ((AVFrame *)frame)->pkt_dts = pkt->dts;
4011 for (i = 0; i < 8; i++) {
4012 if (s->next_refs[i].f->data[0])
4013 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4014 if (s->refs[i].f->data[0] &&
4015 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4024 if (!retain_segmap_ref) {
4025 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4026 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4027 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4028 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4031 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4032 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4033 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4034 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4036 if (s->frames[CUR_FRAME].tf.f->data[0])
4037 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4038 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4040 f = s->frames[CUR_FRAME].tf.f;
4041 f->key_frame = s->keyframe;
4042 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4043 ls_y = f->linesize[0];
4044 ls_uv =f->linesize[1];
4047 for (i = 0; i < 8; i++) {
4048 if (s->next_refs[i].f->data[0])
4049 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4050 if (s->refreshrefmask & (1 << i)) {
4051 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4052 } else if (s->refs[i].f->data[0]) {
4053 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4059 // main tile decode loop
4060 bytesperpixel = s->bytesperpixel;
4061 memset(s->above_partition_ctx, 0, s->cols);
4062 memset(s->above_skip_ctx, 0, s->cols);
4063 if (s->keyframe || s->intraonly) {
4064 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4066 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4068 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4069 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4070 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4071 memset(s->above_segpred_ctx, 0, s->cols);
4072 s->pass = s->frames[CUR_FRAME].uses_2pass =
4073 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4074 if ((res = update_block_buffers(ctx)) < 0) {
4075 av_log(ctx, AV_LOG_ERROR,
4076 "Failed to allocate block buffers\n");
4079 if (s->refreshctx && s->parallelmode) {
4082 for (i = 0; i < 4; i++) {
4083 for (j = 0; j < 2; j++)
4084 for (k = 0; k < 2; k++)
4085 for (l = 0; l < 6; l++)
4086 for (m = 0; m < 6; m++)
4087 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4088 s->prob.coef[i][j][k][l][m], 3);
4089 if (s->txfmmode == i)
4092 s->prob_ctx[s->framectxid].p = s->prob.p;
4093 ff_thread_finish_setup(ctx);
4094 } else if (!s->refreshctx) {
4095 ff_thread_finish_setup(ctx);
4101 s->block = s->block_base;
4102 s->uvblock[0] = s->uvblock_base[0];
4103 s->uvblock[1] = s->uvblock_base[1];
4104 s->eob = s->eob_base;
4105 s->uveob[0] = s->uveob_base[0];
4106 s->uveob[1] = s->uveob_base[1];
4108 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4109 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4110 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4112 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4115 if (tile_col == s->tiling.tile_cols - 1 &&
4116 tile_row == s->tiling.tile_rows - 1) {
4119 tile_size = AV_RB32(data);
4123 if (tile_size > size) {
4124 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4125 return AVERROR_INVALIDDATA;
4127 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4128 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4129 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4130 return AVERROR_INVALIDDATA;
4137 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4138 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4139 struct VP9Filter *lflvl_ptr = s->lflvl;
4140 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4142 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4143 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4144 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4147 memset(s->left_partition_ctx, 0, 8);
4148 memset(s->left_skip_ctx, 0, 8);
4149 if (s->keyframe || s->intraonly) {
4150 memset(s->left_mode_ctx, DC_PRED, 16);
4152 memset(s->left_mode_ctx, NEARESTMV, 8);
4154 memset(s->left_y_nnz_ctx, 0, 16);
4155 memset(s->left_uv_nnz_ctx, 0, 32);
4156 memset(s->left_segpred_ctx, 0, 8);
4158 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4161 for (col = s->tiling.tile_col_start;
4162 col < s->tiling.tile_col_end;
4163 col += 8, yoff2 += 64 * bytesperpixel,
4164 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4165 // FIXME integrate with lf code (i.e. zero after each
4166 // use, similar to invtxfm coefficients, or similar)
4168 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4172 decode_sb_mem(ctx, row, col, lflvl_ptr,
4173 yoff2, uvoff2, BL_64X64);
4175 decode_sb(ctx, row, col, lflvl_ptr,
4176 yoff2, uvoff2, BL_64X64);
4180 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4188 // backup pre-loopfilter reconstruction data for intra
4189 // prediction of next row of sb64s
4190 if (row + 8 < s->rows) {
4191 memcpy(s->intra_pred_data[0],
4192 f->data[0] + yoff + 63 * ls_y,
4193 8 * s->cols * bytesperpixel);
4194 memcpy(s->intra_pred_data[1],
4195 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4196 8 * s->cols * bytesperpixel >> s->ss_h);
4197 memcpy(s->intra_pred_data[2],
4198 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4199 8 * s->cols * bytesperpixel >> s->ss_h);
4202 // loopfilter one row
4203 if (s->filter.level) {
4206 lflvl_ptr = s->lflvl;
4207 for (col = 0; col < s->cols;
4208 col += 8, yoff2 += 64 * bytesperpixel,
4209 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4210 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4214 // FIXME maybe we can make this more finegrained by running the
4215 // loopfilter per-block instead of after each sbrow
4216 // In fact that would also make intra pred left preparation easier?
4217 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4221 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4223 ff_thread_finish_setup(ctx);
4225 } while (s->pass++ == 1);
4226 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4229 for (i = 0; i < 8; i++) {
4230 if (s->refs[i].f->data[0])
4231 ff_thread_release_buffer(ctx, &s->refs[i]);
4232 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4235 if (!s->invisible) {
4236 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4244 static void vp9_decode_flush(AVCodecContext *ctx)
4246 VP9Context *s = ctx->priv_data;
4249 for (i = 0; i < 3; i++)
4250 vp9_unref_frame(ctx, &s->frames[i]);
4251 for (i = 0; i < 8; i++)
4252 ff_thread_release_buffer(ctx, &s->refs[i]);
4255 static int init_frames(AVCodecContext *ctx)
4257 VP9Context *s = ctx->priv_data;
4260 for (i = 0; i < 3; i++) {
4261 s->frames[i].tf.f = av_frame_alloc();
4262 if (!s->frames[i].tf.f) {
4263 vp9_decode_free(ctx);
4264 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4265 return AVERROR(ENOMEM);
4268 for (i = 0; i < 8; i++) {
4269 s->refs[i].f = av_frame_alloc();
4270 s->next_refs[i].f = av_frame_alloc();
4271 if (!s->refs[i].f || !s->next_refs[i].f) {
4272 vp9_decode_free(ctx);
4273 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4274 return AVERROR(ENOMEM);
4281 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4283 VP9Context *s = ctx->priv_data;
4285 ctx->internal->allocate_progress = 1;
4287 s->filter.sharpness = -1;
4289 return init_frames(ctx);
4292 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4294 return init_frames(avctx);
4297 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4300 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4302 // detect size changes in other threads
4303 if (s->intra_pred_data[0] &&
4304 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4308 for (i = 0; i < 3; i++) {
4309 if (s->frames[i].tf.f->data[0])
4310 vp9_unref_frame(dst, &s->frames[i]);
4311 if (ssrc->frames[i].tf.f->data[0]) {
4312 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4316 for (i = 0; i < 8; i++) {
4317 if (s->refs[i].f->data[0])
4318 ff_thread_release_buffer(dst, &s->refs[i]);
4319 if (ssrc->next_refs[i].f->data[0]) {
4320 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4325 s->invisible = ssrc->invisible;
4326 s->keyframe = ssrc->keyframe;
4327 s->ss_v = ssrc->ss_v;
4328 s->ss_h = ssrc->ss_h;
4329 s->segmentation.enabled = ssrc->segmentation.enabled;
4330 s->segmentation.update_map = ssrc->segmentation.update_map;
4331 s->bytesperpixel = ssrc->bytesperpixel;
4333 s->bpp_index = ssrc->bpp_index;
4334 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4335 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4336 if (ssrc->segmentation.enabled) {
4337 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4338 sizeof(s->segmentation.feat));
4344 static const AVProfile profiles[] = {
4345 { FF_PROFILE_VP9_0, "Profile 0" },
4346 { FF_PROFILE_VP9_1, "Profile 1" },
4347 { FF_PROFILE_VP9_2, "Profile 2" },
4348 { FF_PROFILE_VP9_3, "Profile 3" },
4349 { FF_PROFILE_UNKNOWN },
4352 AVCodec ff_vp9_decoder = {
4354 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4355 .type = AVMEDIA_TYPE_VIDEO,
4356 .id = AV_CODEC_ID_VP9,
4357 .priv_data_size = sizeof(VP9Context),
4358 .init = vp9_decode_init,
4359 .close = vp9_decode_free,
4360 .decode = vp9_decode_frame,
4361 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4362 .flush = vp9_decode_flush,
4363 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4364 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4365 .profiles = NULL_IF_CONFIG_SMALL(profiles),