2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
40 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
41 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
44 typedef struct VP9Block {
45 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
46 enum FilterMode filter;
47 VP56mv mv[4 /* b_idx */][2 /* ref */];
49 enum TxfmMode tx, uvtx;
51 enum BlockPartition bp;
54 typedef struct VP9Context {
65 int row, row7, col, col7;
67 ptrdiff_t y_stride, uv_stride;
70 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
71 uint8_t last_keyframe;
72 ThreadFrame next_refs[8];
76 uint8_t mblim_lut[64];
78 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
79 unsigned sb_cols, sb_rows, rows, cols;
82 uint8_t coef[4][2][2][6][6][3];
86 uint8_t coef[4][2][2][6][6][11];
89 unsigned y_mode[4][10];
90 unsigned uv_mode[10][10];
91 unsigned filter[4][3];
92 unsigned mv_mode[7][4];
95 unsigned single_ref[5][2][2];
96 unsigned comp_ref[5][2];
101 unsigned mv_joint[4];
104 unsigned classes[11];
106 unsigned bits[10][2];
107 unsigned class0_fp[2][4];
109 unsigned class0_hp[2];
112 unsigned partition[4][4][4];
113 unsigned coef[4][2][2][6][6][3];
114 unsigned eob[4][2][2][6][6][2];
117 // contextual (left/above) cache
118 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
119 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
120 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
121 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
122 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
123 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
124 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
125 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
126 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
127 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
128 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
129 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
130 uint8_t *above_partition_ctx;
131 uint8_t *above_mode_ctx;
132 // FIXME maybe merge some of the below in a flags field?
133 uint8_t *above_y_nnz_ctx;
134 uint8_t *above_uv_nnz_ctx[2];
135 uint8_t *above_skip_ctx; // 1bit
136 uint8_t *above_txfm_ctx; // 2bit
137 uint8_t *above_segpred_ctx; // 1bit
138 uint8_t *above_intra_ctx; // 1bit
139 uint8_t *above_comp_ctx; // 1bit
140 uint8_t *above_ref_ctx; // 2bit
141 uint8_t *above_filter_ctx;
142 VP56mv (*above_mv_ctx)[2];
145 uint8_t *intra_pred_data[3];
146 struct VP9Filter *lflvl;
147 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
149 // block reconstruction intermediates
150 int block_alloc_using_2pass;
151 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
152 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
153 struct { int x, y; } min_mv, max_mv;
154 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
155 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
156 uint16_t mvscale[3][2];
157 uint8_t mvstep[3][2];
160 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
162 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
163 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
165 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
166 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
170 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
172 VP9Context *s = ctx->priv_data;
175 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
177 sz = 64 * s->sb_cols * s->sb_rows;
178 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
179 ff_thread_release_buffer(ctx, &f->tf);
180 return AVERROR(ENOMEM);
183 f->segmentation_map = f->extradata->data;
184 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
189 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
191 ff_thread_release_buffer(ctx, &f->tf);
192 av_buffer_unref(&f->extradata);
193 f->segmentation_map = NULL;
196 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
200 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
202 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
203 vp9_unref_frame(ctx, dst);
204 return AVERROR(ENOMEM);
207 dst->segmentation_map = src->segmentation_map;
209 dst->uses_2pass = src->uses_2pass;
214 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
216 VP9Context *s = ctx->priv_data;
218 int bytesperpixel = s->bytesperpixel, res;
220 av_assert0(w > 0 && h > 0);
222 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
225 if ((res = ff_set_dimensions(ctx, w, h)) < 0)
228 s->sb_cols = (w + 63) >> 6;
229 s->sb_rows = (h + 63) >> 6;
230 s->cols = (w + 7) >> 3;
231 s->rows = (h + 7) >> 3;
233 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
234 av_freep(&s->intra_pred_data[0]);
235 // FIXME we slightly over-allocate here for subsampled chroma, but a little
236 // bit of padding shouldn't affect performance...
237 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
238 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
240 return AVERROR(ENOMEM);
241 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
242 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
243 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
244 assign(s->above_y_nnz_ctx, uint8_t *, 16);
245 assign(s->above_mode_ctx, uint8_t *, 16);
246 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
247 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
248 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
249 assign(s->above_partition_ctx, uint8_t *, 8);
250 assign(s->above_skip_ctx, uint8_t *, 8);
251 assign(s->above_txfm_ctx, uint8_t *, 8);
252 assign(s->above_segpred_ctx, uint8_t *, 8);
253 assign(s->above_intra_ctx, uint8_t *, 8);
254 assign(s->above_comp_ctx, uint8_t *, 8);
255 assign(s->above_ref_ctx, uint8_t *, 8);
256 assign(s->above_filter_ctx, uint8_t *, 8);
257 assign(s->lflvl, struct VP9Filter *, 1);
260 // these will be re-allocated a little later
261 av_freep(&s->b_base);
262 av_freep(&s->block_base);
264 if (s->bpp != s->last_bpp) {
265 ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
266 ff_videodsp_init(&s->vdsp, s->bpp);
267 s->last_bpp = s->bpp;
273 static int update_block_buffers(AVCodecContext *ctx)
275 VP9Context *s = ctx->priv_data;
276 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
278 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
282 av_free(s->block_base);
283 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
284 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
285 if (s->s.frames[CUR_FRAME].uses_2pass) {
286 int sbs = s->sb_cols * s->sb_rows;
288 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
289 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
290 16 * 16 + 2 * chroma_eobs) * sbs);
291 if (!s->b_base || !s->block_base)
292 return AVERROR(ENOMEM);
293 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
294 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
295 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
296 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
297 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
299 s->b_base = av_malloc(sizeof(VP9Block));
300 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
301 16 * 16 + 2 * chroma_eobs);
302 if (!s->b_base || !s->block_base)
303 return AVERROR(ENOMEM);
304 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
305 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
306 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
307 s->uveob_base[0] = s->eob_base + 16 * 16;
308 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
310 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
315 // for some reason the sign bit is at the end, not the start, of a bit sequence
316 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
318 int v = get_bits(gb, n);
319 return get_bits1(gb) ? -v : v;
322 static av_always_inline int inv_recenter_nonneg(int v, int m)
324 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
327 // differential forward probability updates
328 static int update_prob(VP56RangeCoder *c, int p)
330 static const int inv_map_table[255] = {
331 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
332 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
333 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
334 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
335 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
336 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
337 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
338 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
339 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
340 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
341 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
342 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
343 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
344 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
345 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
346 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
347 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
348 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
353 /* This code is trying to do a differential probability update. For a
354 * current probability A in the range [1, 255], the difference to a new
355 * probability of any value can be expressed differentially as 1-A,255-A
356 * where some part of this (absolute range) exists both in positive as
357 * well as the negative part, whereas another part only exists in one
358 * half. We're trying to code this shared part differentially, i.e.
359 * times two where the value of the lowest bit specifies the sign, and
360 * the single part is then coded on top of this. This absolute difference
361 * then again has a value of [0,254], but a bigger value in this range
362 * indicates that we're further away from the original value A, so we
363 * can code this as a VLC code, since higher values are increasingly
364 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
365 * updates vs. the 'fine, exact' updates further down the range, which
366 * adds one extra dimension to this differential update model. */
368 if (!vp8_rac_get(c)) {
369 d = vp8_rac_get_uint(c, 4) + 0;
370 } else if (!vp8_rac_get(c)) {
371 d = vp8_rac_get_uint(c, 4) + 16;
372 } else if (!vp8_rac_get(c)) {
373 d = vp8_rac_get_uint(c, 5) + 32;
375 d = vp8_rac_get_uint(c, 7);
377 d = (d << 1) - 65 + vp8_rac_get(c);
379 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
382 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
383 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
386 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
388 static const enum AVColorSpace colorspaces[8] = {
389 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
390 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
392 VP9Context *s = ctx->priv_data;
393 enum AVPixelFormat res;
394 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
397 s->bpp = 8 + bits * 2;
398 s->bytesperpixel = (7 + s->bpp) >> 3;
399 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
400 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
401 static const enum AVPixelFormat pix_fmt_rgb[3] = {
402 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
404 if (ctx->profile & 1) {
405 s->ss_h = s->ss_v = 0;
406 res = pix_fmt_rgb[bits];
407 ctx->color_range = AVCOL_RANGE_JPEG;
408 if (get_bits1(&s->gb)) {
409 av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
410 return AVERROR_INVALIDDATA;
413 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
415 return AVERROR_INVALIDDATA;
418 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
419 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
420 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
421 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
422 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
423 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
424 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
426 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
427 if (ctx->profile & 1) {
428 s->ss_h = get_bits1(&s->gb);
429 s->ss_v = get_bits1(&s->gb);
430 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
431 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
433 return AVERROR_INVALIDDATA;
434 } else if (get_bits1(&s->gb)) {
435 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
437 return AVERROR_INVALIDDATA;
440 s->ss_h = s->ss_v = 1;
441 res = pix_fmt_for_ss[bits][1][1];
448 static int decode_frame_header(AVCodecContext *ctx,
449 const uint8_t *data, int size, int *ref)
451 VP9Context *s = ctx->priv_data;
452 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
453 enum AVPixelFormat fmt = ctx->pix_fmt;
455 const uint8_t *data2;
458 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
459 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
462 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
463 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
464 return AVERROR_INVALIDDATA;
466 ctx->profile = get_bits1(&s->gb);
467 ctx->profile |= get_bits1(&s->gb) << 1;
468 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
469 if (ctx->profile > 3) {
470 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
471 return AVERROR_INVALIDDATA;
473 s->s.h.profile = ctx->profile;
474 if (get_bits1(&s->gb)) {
475 *ref = get_bits(&s->gb, 3);
478 s->last_keyframe = s->s.h.keyframe;
479 s->s.h.keyframe = !get_bits1(&s->gb);
480 last_invisible = s->s.h.invisible;
481 s->s.h.invisible = !get_bits1(&s->gb);
482 s->s.h.errorres = get_bits1(&s->gb);
483 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
484 if (s->s.h.keyframe) {
485 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
486 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
487 return AVERROR_INVALIDDATA;
489 if ((fmt = read_colorspace_details(ctx)) < 0)
491 // for profile 1, here follows the subsampling bits
492 s->s.h.refreshrefmask = 0xff;
493 w = get_bits(&s->gb, 16) + 1;
494 h = get_bits(&s->gb, 16) + 1;
495 if (get_bits1(&s->gb)) // display size
496 skip_bits(&s->gb, 32);
498 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
499 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
500 if (s->s.h.intraonly) {
501 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
502 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
503 return AVERROR_INVALIDDATA;
505 if (ctx->profile >= 1) {
506 if ((fmt = read_colorspace_details(ctx)) < 0)
509 s->ss_h = s->ss_v = 1;
512 s->bytesperpixel = 1;
513 fmt = AV_PIX_FMT_YUV420P;
514 ctx->colorspace = AVCOL_SPC_BT470BG;
515 ctx->color_range = AVCOL_RANGE_JPEG;
517 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
518 w = get_bits(&s->gb, 16) + 1;
519 h = get_bits(&s->gb, 16) + 1;
520 if (get_bits1(&s->gb)) // display size
521 skip_bits(&s->gb, 32);
523 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
524 s->s.h.refidx[0] = get_bits(&s->gb, 3);
525 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
526 s->s.h.refidx[1] = get_bits(&s->gb, 3);
527 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
528 s->s.h.refidx[2] = get_bits(&s->gb, 3);
529 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
530 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
531 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
532 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
533 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
534 return AVERROR_INVALIDDATA;
536 if (get_bits1(&s->gb)) {
537 w = s->s.refs[s->s.h.refidx[0]].f->width;
538 h = s->s.refs[s->s.h.refidx[0]].f->height;
539 } else if (get_bits1(&s->gb)) {
540 w = s->s.refs[s->s.h.refidx[1]].f->width;
541 h = s->s.refs[s->s.h.refidx[1]].f->height;
542 } else if (get_bits1(&s->gb)) {
543 w = s->s.refs[s->s.h.refidx[2]].f->width;
544 h = s->s.refs[s->s.h.refidx[2]].f->height;
546 w = get_bits(&s->gb, 16) + 1;
547 h = get_bits(&s->gb, 16) + 1;
549 // Note that in this code, "CUR_FRAME" is actually before we
550 // have formally allocated a frame, and thus actually represents
552 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
553 s->s.frames[CUR_FRAME].tf.f->height == h;
554 if (get_bits1(&s->gb)) // display size
555 skip_bits(&s->gb, 32);
556 s->s.h.highprecisionmvs = get_bits1(&s->gb);
557 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
559 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
560 s->s.h.signbias[0] != s->s.h.signbias[2];
561 if (s->s.h.allowcompinter) {
562 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
563 s->s.h.fixcompref = 2;
564 s->s.h.varcompref[0] = 0;
565 s->s.h.varcompref[1] = 1;
566 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
567 s->s.h.fixcompref = 1;
568 s->s.h.varcompref[0] = 0;
569 s->s.h.varcompref[1] = 2;
571 s->s.h.fixcompref = 0;
572 s->s.h.varcompref[0] = 1;
573 s->s.h.varcompref[1] = 2;
577 for (i = 0; i < 3; i++) {
578 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
579 int refw = ref->width, refh = ref->height;
581 if (ref->format != fmt) {
582 av_log(ctx, AV_LOG_ERROR,
583 "Ref pixfmt (%s) did not match current frame (%s)",
584 av_get_pix_fmt_name(ref->format),
585 av_get_pix_fmt_name(fmt));
586 return AVERROR_INVALIDDATA;
587 } else if (refw == w && refh == h) {
588 s->mvscale[i][0] = s->mvscale[i][1] = 0;
590 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
591 av_log(ctx, AV_LOG_ERROR,
592 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
594 return AVERROR_INVALIDDATA;
596 s->mvscale[i][0] = (refw << 14) / w;
597 s->mvscale[i][1] = (refh << 14) / h;
598 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
599 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
604 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
605 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
606 s->s.h.framectxid = c = get_bits(&s->gb, 2);
608 /* loopfilter header data */
609 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
610 // reset loopfilter defaults
611 s->s.h.lf_delta.ref[0] = 1;
612 s->s.h.lf_delta.ref[1] = 0;
613 s->s.h.lf_delta.ref[2] = -1;
614 s->s.h.lf_delta.ref[3] = -1;
615 s->s.h.lf_delta.mode[0] = 0;
616 s->s.h.lf_delta.mode[1] = 0;
617 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
619 s->s.h.filter.level = get_bits(&s->gb, 6);
620 sharp = get_bits(&s->gb, 3);
621 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
622 // the old cache values since they are still valid
623 if (s->s.h.filter.sharpness != sharp)
624 memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut));
625 s->s.h.filter.sharpness = sharp;
626 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
627 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
628 for (i = 0; i < 4; i++)
629 if (get_bits1(&s->gb))
630 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
631 for (i = 0; i < 2; i++)
632 if (get_bits1(&s->gb))
633 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
637 /* quantization header data */
638 s->s.h.yac_qi = get_bits(&s->gb, 8);
639 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
640 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
641 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
642 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
643 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
645 ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
647 /* segmentation header info */
648 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
649 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
650 for (i = 0; i < 7; i++)
651 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
652 get_bits(&s->gb, 8) : 255;
653 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) {
654 for (i = 0; i < 3; i++)
655 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
656 get_bits(&s->gb, 8) : 255;
660 if (get_bits1(&s->gb)) {
661 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
662 for (i = 0; i < 8; i++) {
663 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
664 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
665 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
666 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
667 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
668 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
669 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
674 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
675 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
676 int qyac, qydc, quvac, quvdc, lflvl, sh;
678 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
679 if (s->s.h.segmentation.absolute_vals)
680 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
682 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
684 qyac = s->s.h.yac_qi;
686 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
687 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
688 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
689 qyac = av_clip_uintp2(qyac, 8);
691 s->s.h.segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
692 s->s.h.segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
693 s->s.h.segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
694 s->s.h.segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
696 sh = s->s.h.filter.level >= 32;
697 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
698 if (s->s.h.segmentation.absolute_vals)
699 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
701 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
703 lflvl = s->s.h.filter.level;
705 if (s->s.h.lf_delta.enabled) {
706 s->s.h.segmentation.feat[i].lflvl[0][0] =
707 s->s.h.segmentation.feat[i].lflvl[0][1] =
708 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] << sh), 6);
709 for (j = 1; j < 4; j++) {
710 s->s.h.segmentation.feat[i].lflvl[j][0] =
711 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
712 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
713 s->s.h.segmentation.feat[i].lflvl[j][1] =
714 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
715 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
718 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
719 sizeof(s->s.h.segmentation.feat[i].lflvl));
724 if ((res = update_size(ctx, w, h, fmt)) < 0) {
725 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
728 for (s->s.h.tiling.log2_tile_cols = 0;
729 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
730 s->s.h.tiling.log2_tile_cols++) ;
731 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
732 max = FFMAX(0, max - 1);
733 while (max > s->s.h.tiling.log2_tile_cols) {
734 if (get_bits1(&s->gb))
735 s->s.h.tiling.log2_tile_cols++;
739 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
740 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
741 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
742 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
743 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
744 sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols);
746 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
747 return AVERROR(ENOMEM);
751 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
752 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
753 s->prob_ctx[3].p = vp9_default_probs;
754 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
755 sizeof(vp9_default_coef_probs));
756 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
757 sizeof(vp9_default_coef_probs));
758 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
759 sizeof(vp9_default_coef_probs));
760 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
761 sizeof(vp9_default_coef_probs));
762 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
763 s->prob_ctx[c].p = vp9_default_probs;
764 memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
765 sizeof(vp9_default_coef_probs));
768 // next 16 bits is size of the rest of the header (arith-coded)
769 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
770 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
772 data2 = align_get_bits(&s->gb);
773 if (size2 > size - (data2 - data)) {
774 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
775 return AVERROR_INVALIDDATA;
777 ff_vp56_init_range_decoder(&s->c, data2, size2);
778 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
779 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
780 return AVERROR_INVALIDDATA;
783 if (s->s.h.keyframe || s->s.h.intraonly) {
784 memset(s->counts.coef, 0, sizeof(s->counts.coef));
785 memset(s->counts.eob, 0, sizeof(s->counts.eob));
787 memset(&s->counts, 0, sizeof(s->counts));
789 // FIXME is it faster to not copy here, but do it down in the fw updates
790 // as explicit copies if the fw update is missing (and skip the copy upon
792 s->prob.p = s->prob_ctx[c].p;
795 if (s->s.h.lossless) {
796 s->s.h.txfmmode = TX_4X4;
798 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
799 if (s->s.h.txfmmode == 3)
800 s->s.h.txfmmode += vp8_rac_get(&s->c);
802 if (s->s.h.txfmmode == TX_SWITCHABLE) {
803 for (i = 0; i < 2; i++)
804 if (vp56_rac_get_prob_branchy(&s->c, 252))
805 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
806 for (i = 0; i < 2; i++)
807 for (j = 0; j < 2; j++)
808 if (vp56_rac_get_prob_branchy(&s->c, 252))
809 s->prob.p.tx16p[i][j] =
810 update_prob(&s->c, s->prob.p.tx16p[i][j]);
811 for (i = 0; i < 2; i++)
812 for (j = 0; j < 3; j++)
813 if (vp56_rac_get_prob_branchy(&s->c, 252))
814 s->prob.p.tx32p[i][j] =
815 update_prob(&s->c, s->prob.p.tx32p[i][j]);
820 for (i = 0; i < 4; i++) {
821 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
822 if (vp8_rac_get(&s->c)) {
823 for (j = 0; j < 2; j++)
824 for (k = 0; k < 2; k++)
825 for (l = 0; l < 6; l++)
826 for (m = 0; m < 6; m++) {
827 uint8_t *p = s->prob.coef[i][j][k][l][m];
828 uint8_t *r = ref[j][k][l][m];
829 if (m >= 3 && l == 0) // dc only has 3 pt
831 for (n = 0; n < 3; n++) {
832 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
833 p[n] = update_prob(&s->c, r[n]);
841 for (j = 0; j < 2; j++)
842 for (k = 0; k < 2; k++)
843 for (l = 0; l < 6; l++)
844 for (m = 0; m < 6; m++) {
845 uint8_t *p = s->prob.coef[i][j][k][l][m];
846 uint8_t *r = ref[j][k][l][m];
847 if (m > 3 && l == 0) // dc only has 3 pt
853 if (s->s.h.txfmmode == i)
858 for (i = 0; i < 3; i++)
859 if (vp56_rac_get_prob_branchy(&s->c, 252))
860 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
861 if (!s->s.h.keyframe && !s->s.h.intraonly) {
862 for (i = 0; i < 7; i++)
863 for (j = 0; j < 3; j++)
864 if (vp56_rac_get_prob_branchy(&s->c, 252))
865 s->prob.p.mv_mode[i][j] =
866 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
868 if (s->s.h.filtermode == FILTER_SWITCHABLE)
869 for (i = 0; i < 4; i++)
870 for (j = 0; j < 2; j++)
871 if (vp56_rac_get_prob_branchy(&s->c, 252))
872 s->prob.p.filter[i][j] =
873 update_prob(&s->c, s->prob.p.filter[i][j]);
875 for (i = 0; i < 4; i++)
876 if (vp56_rac_get_prob_branchy(&s->c, 252))
877 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
879 if (s->s.h.allowcompinter) {
880 s->s.h.comppredmode = vp8_rac_get(&s->c);
881 if (s->s.h.comppredmode)
882 s->s.h.comppredmode += vp8_rac_get(&s->c);
883 if (s->s.h.comppredmode == PRED_SWITCHABLE)
884 for (i = 0; i < 5; i++)
885 if (vp56_rac_get_prob_branchy(&s->c, 252))
887 update_prob(&s->c, s->prob.p.comp[i]);
889 s->s.h.comppredmode = PRED_SINGLEREF;
892 if (s->s.h.comppredmode != PRED_COMPREF) {
893 for (i = 0; i < 5; i++) {
894 if (vp56_rac_get_prob_branchy(&s->c, 252))
895 s->prob.p.single_ref[i][0] =
896 update_prob(&s->c, s->prob.p.single_ref[i][0]);
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.single_ref[i][1] =
899 update_prob(&s->c, s->prob.p.single_ref[i][1]);
903 if (s->s.h.comppredmode != PRED_SINGLEREF) {
904 for (i = 0; i < 5; i++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.comp_ref[i] =
907 update_prob(&s->c, s->prob.p.comp_ref[i]);
910 for (i = 0; i < 4; i++)
911 for (j = 0; j < 9; j++)
912 if (vp56_rac_get_prob_branchy(&s->c, 252))
913 s->prob.p.y_mode[i][j] =
914 update_prob(&s->c, s->prob.p.y_mode[i][j]);
916 for (i = 0; i < 4; i++)
917 for (j = 0; j < 4; j++)
918 for (k = 0; k < 3; k++)
919 if (vp56_rac_get_prob_branchy(&s->c, 252))
920 s->prob.p.partition[3 - i][j][k] =
921 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
923 // mv fields don't use the update_prob subexp model for some reason
924 for (i = 0; i < 3; i++)
925 if (vp56_rac_get_prob_branchy(&s->c, 252))
926 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
928 for (i = 0; i < 2; i++) {
929 if (vp56_rac_get_prob_branchy(&s->c, 252))
930 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
932 for (j = 0; j < 10; j++)
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.mv_comp[i].classes[j] =
935 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
940 for (j = 0; j < 10; j++)
941 if (vp56_rac_get_prob_branchy(&s->c, 252))
942 s->prob.p.mv_comp[i].bits[j] =
943 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
946 for (i = 0; i < 2; i++) {
947 for (j = 0; j < 2; j++)
948 for (k = 0; k < 3; k++)
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.mv_comp[i].class0_fp[j][k] =
951 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
953 for (j = 0; j < 3; j++)
954 if (vp56_rac_get_prob_branchy(&s->c, 252))
955 s->prob.p.mv_comp[i].fp[j] =
956 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
959 if (s->s.h.highprecisionmvs) {
960 for (i = 0; i < 2; i++) {
961 if (vp56_rac_get_prob_branchy(&s->c, 252))
962 s->prob.p.mv_comp[i].class0_hp =
963 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
965 if (vp56_rac_get_prob_branchy(&s->c, 252))
966 s->prob.p.mv_comp[i].hp =
967 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
972 return (data2 - data) + size2;
975 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
978 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
979 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
982 static void find_ref_mvs(VP9Context *s,
983 VP56mv *pmv, int ref, int z, int idx, int sb)
985 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
986 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
987 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
988 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
989 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
990 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
991 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
992 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
993 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
994 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
995 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
996 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
997 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
998 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
999 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1000 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1001 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1002 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1003 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1004 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1005 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1006 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1007 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1008 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1009 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1010 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1011 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1014 int row = s->row, col = s->col, row7 = s->row7;
1015 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1016 #define INVALID_MV 0x80008000U
1017 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1020 #define RETURN_DIRECT_MV(mv) \
1022 uint32_t m = AV_RN32A(&mv); \
1026 } else if (mem == INVALID_MV) { \
1028 } else if (m != mem) { \
1035 if (sb == 2 || sb == 1) {
1036 RETURN_DIRECT_MV(b->mv[0][z]);
1037 } else if (sb == 3) {
1038 RETURN_DIRECT_MV(b->mv[2][z]);
1039 RETURN_DIRECT_MV(b->mv[1][z]);
1040 RETURN_DIRECT_MV(b->mv[0][z]);
1043 #define RETURN_MV(mv) \
1048 av_assert2(idx == 1); \
1049 av_assert2(mem != INVALID_MV); \
1050 if (mem_sub8x8 == INVALID_MV) { \
1051 clamp_mv(&tmp, &mv, s); \
1052 m = AV_RN32A(&tmp); \
1057 mem_sub8x8 = AV_RN32A(&mv); \
1058 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1059 clamp_mv(&tmp, &mv, s); \
1060 m = AV_RN32A(&tmp); \
1064 /* BUG I'm pretty sure this isn't the intention */ \
1070 uint32_t m = AV_RN32A(&mv); \
1072 clamp_mv(pmv, &mv, s); \
1074 } else if (mem == INVALID_MV) { \
1076 } else if (m != mem) { \
1077 clamp_mv(pmv, &mv, s); \
1084 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1085 if (mv->ref[0] == ref) {
1086 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1087 } else if (mv->ref[1] == ref) {
1088 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1091 if (col > s->tile_col_start) {
1092 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1093 if (mv->ref[0] == ref) {
1094 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1095 } else if (mv->ref[1] == ref) {
1096 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1104 // previously coded MVs in this neighbourhood, using same reference frame
1105 for (; i < 8; i++) {
1106 int c = p[i][0] + col, r = p[i][1] + row;
1108 if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1109 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1111 if (mv->ref[0] == ref) {
1112 RETURN_MV(mv->mv[0]);
1113 } else if (mv->ref[1] == ref) {
1114 RETURN_MV(mv->mv[1]);
1119 // MV at this position in previous frame, using same reference frame
1120 if (s->s.h.use_last_frame_mvs) {
1121 struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1123 if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass)
1124 ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1125 if (mv->ref[0] == ref) {
1126 RETURN_MV(mv->mv[0]);
1127 } else if (mv->ref[1] == ref) {
1128 RETURN_MV(mv->mv[1]);
1132 #define RETURN_SCALE_MV(mv, scale) \
1135 VP56mv mv_temp = { -mv.x, -mv.y }; \
1136 RETURN_MV(mv_temp); \
1142 // previously coded MVs in this neighbourhood, using different reference frame
1143 for (i = 0; i < 8; i++) {
1144 int c = p[i][0] + col, r = p[i][1] + row;
1146 if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1147 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1149 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1150 RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
1152 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1153 // BUG - libvpx has this condition regardless of whether
1154 // we used the first ref MV and pre-scaling
1155 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1156 RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
1161 // MV at this position in previous frame, using different reference frame
1162 if (s->s.h.use_last_frame_mvs) {
1163 struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1165 // no need to await_progress, because we already did that above
1166 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1167 RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
1169 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1170 // BUG - libvpx has this condition regardless of whether
1171 // we used the first ref MV and pre-scaling
1172 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1173 RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
1178 clamp_mv(pmv, pmv, s);
1181 #undef RETURN_SCALE_MV
1184 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1186 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1187 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1188 s->prob.p.mv_comp[idx].classes);
1190 s->counts.mv_comp[idx].sign[sign]++;
1191 s->counts.mv_comp[idx].classes[c]++;
1195 for (n = 0, m = 0; m < c; m++) {
1196 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1198 s->counts.mv_comp[idx].bits[m][bit]++;
1201 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1203 s->counts.mv_comp[idx].fp[bit]++;
1205 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1206 s->counts.mv_comp[idx].hp[bit]++;
1210 // bug in libvpx - we count for bw entropy purposes even if the
1212 s->counts.mv_comp[idx].hp[1]++;
1216 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1217 s->counts.mv_comp[idx].class0[n]++;
1218 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1219 s->prob.p.mv_comp[idx].class0_fp[n]);
1220 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1221 n = (n << 3) | (bit << 1);
1223 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1224 s->counts.mv_comp[idx].class0_hp[bit]++;
1228 // bug in libvpx - we count for bw entropy purposes even if the
1230 s->counts.mv_comp[idx].class0_hp[1]++;
1234 return sign ? -(n + 1) : (n + 1);
1237 static void fill_mv(VP9Context *s,
1238 VP56mv *mv, int mode, int sb)
1242 if (mode == ZEROMV) {
1247 // FIXME cache this value and reuse for other subblocks
1248 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1249 mode == NEWMV ? -1 : sb);
1250 // FIXME maybe move this code into find_ref_mvs()
1251 if ((mode == NEWMV || sb == -1) &&
1252 !(hp = s->s.h.highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1266 if (mode == NEWMV) {
1267 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1268 s->prob.p.mv_joint);
1270 s->counts.mv_joint[j]++;
1271 if (j >= MV_JOINT_V)
1272 mv[0].y += read_mv_component(s, 0, hp);
1274 mv[0].x += read_mv_component(s, 1, hp);
1278 // FIXME cache this value and reuse for other subblocks
1279 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1280 mode == NEWMV ? -1 : sb);
1281 if ((mode == NEWMV || sb == -1) &&
1282 !(hp = s->s.h.highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1296 if (mode == NEWMV) {
1297 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1298 s->prob.p.mv_joint);
1300 s->counts.mv_joint[j]++;
1301 if (j >= MV_JOINT_V)
1302 mv[1].y += read_mv_component(s, 0, hp);
1304 mv[1].x += read_mv_component(s, 1, hp);
1310 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1311 ptrdiff_t stride, int v)
1321 int v16 = v * 0x0101;
1329 uint32_t v32 = v * 0x01010101;
1338 uint64_t v64 = v * 0x0101010101010101ULL;
1344 uint32_t v32 = v * 0x01010101;
1347 AV_WN32A(ptr + 4, v32);
1356 static void decode_mode(AVCodecContext *ctx)
1358 static const uint8_t left_ctx[N_BS_SIZES] = {
1359 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1361 static const uint8_t above_ctx[N_BS_SIZES] = {
1362 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1364 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1365 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1366 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1368 VP9Context *s = ctx->priv_data;
1370 int row = s->row, col = s->col, row7 = s->row7;
1371 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1372 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1373 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1374 int have_a = row > 0, have_l = col > s->tile_col_start;
1375 int vref, filter_id;
1377 if (!s->s.h.segmentation.enabled) {
1379 } else if (s->s.h.keyframe || s->s.h.intraonly) {
1380 b->seg_id = !s->s.h.segmentation.update_map ? 0 :
1381 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->s.h.segmentation.prob);
1382 } else if (!s->s.h.segmentation.update_map ||
1383 (s->s.h.segmentation.temporal &&
1384 vp56_rac_get_prob_branchy(&s->c,
1385 s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
1386 s->left_segpred_ctx[row7]]))) {
1387 if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
1389 uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
1391 if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
1392 ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1393 for (y = 0; y < h4; y++) {
1394 int idx_base = (y + row) * 8 * s->sb_cols + col;
1395 for (x = 0; x < w4; x++)
1396 pred = FFMIN(pred, refsegmap[idx_base + x]);
1398 av_assert1(pred < 8);
1404 memset(&s->above_segpred_ctx[col], 1, w4);
1405 memset(&s->left_segpred_ctx[row7], 1, h4);
1407 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1408 s->s.h.segmentation.prob);
1410 memset(&s->above_segpred_ctx[col], 0, w4);
1411 memset(&s->left_segpred_ctx[row7], 0, h4);
1413 if (s->s.h.segmentation.enabled &&
1414 (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
1415 setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1416 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1419 b->skip = s->s.h.segmentation.enabled &&
1420 s->s.h.segmentation.feat[b->seg_id].skip_enabled;
1422 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1423 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1424 s->counts.skip[c][b->skip]++;
1427 if (s->s.h.keyframe || s->s.h.intraonly) {
1429 } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
1430 b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
1434 if (have_a && have_l) {
1435 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1438 c = have_a ? 2 * s->above_intra_ctx[col] :
1439 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1441 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1442 s->counts.intra[c][bit]++;
1446 if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
1450 c = (s->above_skip_ctx[col] ? max_tx :
1451 s->above_txfm_ctx[col]) +
1452 (s->left_skip_ctx[row7] ? max_tx :
1453 s->left_txfm_ctx[row7]) > max_tx;
1455 c = s->above_skip_ctx[col] ? 1 :
1456 (s->above_txfm_ctx[col] * 2 > max_tx);
1458 } else if (have_l) {
1459 c = s->left_skip_ctx[row7] ? 1 :
1460 (s->left_txfm_ctx[row7] * 2 > max_tx);
1466 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1468 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1470 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1472 s->counts.tx32p[c][b->tx]++;
1475 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1477 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1478 s->counts.tx16p[c][b->tx]++;
1481 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1482 s->counts.tx8p[c][b->tx]++;
1489 b->tx = FFMIN(max_tx, s->s.h.txfmmode);
1492 if (s->s.h.keyframe || s->s.h.intraonly) {
1493 uint8_t *a = &s->above_mode_ctx[col * 2];
1494 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1497 if (b->bs > BS_8x8) {
1498 // FIXME the memory storage intermediates here aren't really
1499 // necessary, they're just there to make the code slightly
1501 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1502 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1503 if (b->bs != BS_8x4) {
1504 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1505 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1506 l[0] = a[1] = b->mode[1];
1508 l[0] = a[1] = b->mode[1] = b->mode[0];
1510 if (b->bs != BS_4x8) {
1511 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1512 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1513 if (b->bs != BS_8x4) {
1514 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1515 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1516 l[1] = a[1] = b->mode[3];
1518 l[1] = a[1] = b->mode[3] = b->mode[2];
1521 b->mode[2] = b->mode[0];
1522 l[1] = a[1] = b->mode[3] = b->mode[1];
1525 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1526 vp9_default_kf_ymode_probs[*a][*l]);
1527 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1528 // FIXME this can probably be optimized
1529 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1530 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1532 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1533 vp9_default_kf_uvmode_probs[b->mode[3]]);
1534 } else if (b->intra) {
1536 if (b->bs > BS_8x8) {
1537 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1538 s->prob.p.y_mode[0]);
1539 s->counts.y_mode[0][b->mode[0]]++;
1540 if (b->bs != BS_8x4) {
1541 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1542 s->prob.p.y_mode[0]);
1543 s->counts.y_mode[0][b->mode[1]]++;
1545 b->mode[1] = b->mode[0];
1547 if (b->bs != BS_4x8) {
1548 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1549 s->prob.p.y_mode[0]);
1550 s->counts.y_mode[0][b->mode[2]]++;
1551 if (b->bs != BS_8x4) {
1552 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1553 s->prob.p.y_mode[0]);
1554 s->counts.y_mode[0][b->mode[3]]++;
1556 b->mode[3] = b->mode[2];
1559 b->mode[2] = b->mode[0];
1560 b->mode[3] = b->mode[1];
1563 static const uint8_t size_group[10] = {
1564 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1566 int sz = size_group[b->bs];
1568 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1569 s->prob.p.y_mode[sz]);
1570 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1571 s->counts.y_mode[sz][b->mode[3]]++;
1573 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1574 s->prob.p.uv_mode[b->mode[3]]);
1575 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1577 static const uint8_t inter_mode_ctx_lut[14][14] = {
1578 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1579 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1580 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1581 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1582 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1583 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1584 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1585 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1586 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1587 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1588 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1589 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1590 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1591 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1594 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
1595 av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
1597 b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
1599 // read comp_pred flag
1600 if (s->s.h.comppredmode != PRED_SWITCHABLE) {
1601 b->comp = s->s.h.comppredmode == PRED_COMPREF;
1605 // FIXME add intra as ref=0xff (or -1) to make these easier?
1608 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1610 } else if (s->above_comp_ctx[col]) {
1611 c = 2 + (s->left_intra_ctx[row7] ||
1612 s->left_ref_ctx[row7] == s->s.h.fixcompref);
1613 } else if (s->left_comp_ctx[row7]) {
1614 c = 2 + (s->above_intra_ctx[col] ||
1615 s->above_ref_ctx[col] == s->s.h.fixcompref);
1617 c = (!s->above_intra_ctx[col] &&
1618 s->above_ref_ctx[col] == s->s.h.fixcompref) ^
1619 (!s->left_intra_ctx[row7] &&
1620 s->left_ref_ctx[row & 7] == s->s.h.fixcompref);
1623 c = s->above_comp_ctx[col] ? 3 :
1624 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
1626 } else if (have_l) {
1627 c = s->left_comp_ctx[row7] ? 3 :
1628 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->s.h.fixcompref);
1632 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1633 s->counts.comp[c][b->comp]++;
1636 // read actual references
1637 // FIXME probably cache a few variables here to prevent repetitive
1638 // memory accesses below
1639 if (b->comp) /* two references */ {
1640 int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
1642 b->ref[fix_idx] = s->s.h.fixcompref;
1643 // FIXME can this codeblob be replaced by some sort of LUT?
1646 if (s->above_intra_ctx[col]) {
1647 if (s->left_intra_ctx[row7]) {
1650 c = 1 + 2 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1652 } else if (s->left_intra_ctx[row7]) {
1653 c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1655 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1657 if (refl == refa && refa == s->s.h.varcompref[1]) {
1659 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1660 if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
1661 (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
1664 c = (refa == refl) ? 3 : 1;
1666 } else if (!s->left_comp_ctx[row7]) {
1667 if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
1670 c = (refl == s->s.h.varcompref[1] &&
1671 refa != s->s.h.varcompref[1]) ? 2 : 4;
1673 } else if (!s->above_comp_ctx[col]) {
1674 if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
1677 c = (refa == s->s.h.varcompref[1] &&
1678 refl != s->s.h.varcompref[1]) ? 2 : 4;
1681 c = (refl == refa) ? 4 : 2;
1685 if (s->above_intra_ctx[col]) {
1687 } else if (s->above_comp_ctx[col]) {
1688 c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1690 c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1693 } else if (have_l) {
1694 if (s->left_intra_ctx[row7]) {
1696 } else if (s->left_comp_ctx[row7]) {
1697 c = 4 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1699 c = 3 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1704 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1705 b->ref[var_idx] = s->s.h.varcompref[bit];
1706 s->counts.comp_ref[c][bit]++;
1707 } else /* single reference */ {
1710 if (have_a && !s->above_intra_ctx[col]) {
1711 if (have_l && !s->left_intra_ctx[row7]) {
1712 if (s->left_comp_ctx[row7]) {
1713 if (s->above_comp_ctx[col]) {
1714 c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7] ||
1715 !s->above_ref_ctx[col]);
1717 c = (3 * !s->above_ref_ctx[col]) +
1718 (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
1720 } else if (s->above_comp_ctx[col]) {
1721 c = (3 * !s->left_ref_ctx[row7]) +
1722 (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
1724 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1726 } else if (s->above_intra_ctx[col]) {
1728 } else if (s->above_comp_ctx[col]) {
1729 c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
1731 c = 4 * (!s->above_ref_ctx[col]);
1733 } else if (have_l && !s->left_intra_ctx[row7]) {
1734 if (s->left_intra_ctx[row7]) {
1736 } else if (s->left_comp_ctx[row7]) {
1737 c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
1739 c = 4 * (!s->left_ref_ctx[row7]);
1744 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1745 s->counts.single_ref[c][0][bit]++;
1749 // FIXME can this codeblob be replaced by some sort of LUT?
1752 if (s->left_intra_ctx[row7]) {
1753 if (s->above_intra_ctx[col]) {
1755 } else if (s->above_comp_ctx[col]) {
1756 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1757 s->above_ref_ctx[col] == 1);
1758 } else if (!s->above_ref_ctx[col]) {
1761 c = 4 * (s->above_ref_ctx[col] == 1);
1763 } else if (s->above_intra_ctx[col]) {
1764 if (s->left_intra_ctx[row7]) {
1766 } else if (s->left_comp_ctx[row7]) {
1767 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1768 s->left_ref_ctx[row7] == 1);
1769 } else if (!s->left_ref_ctx[row7]) {
1772 c = 4 * (s->left_ref_ctx[row7] == 1);
1774 } else if (s->above_comp_ctx[col]) {
1775 if (s->left_comp_ctx[row7]) {
1776 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1777 c = 3 * (s->s.h.fixcompref == 1 ||
1778 s->left_ref_ctx[row7] == 1);
1782 } else if (!s->left_ref_ctx[row7]) {
1783 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1784 s->above_ref_ctx[col] == 1);
1786 c = 3 * (s->left_ref_ctx[row7] == 1) +
1787 (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
1789 } else if (s->left_comp_ctx[row7]) {
1790 if (!s->above_ref_ctx[col]) {
1791 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1792 s->left_ref_ctx[row7] == 1);
1794 c = 3 * (s->above_ref_ctx[col] == 1) +
1795 (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1797 } else if (!s->above_ref_ctx[col]) {
1798 if (!s->left_ref_ctx[row7]) {
1801 c = 4 * (s->left_ref_ctx[row7] == 1);
1803 } else if (!s->left_ref_ctx[row7]) {
1804 c = 4 * (s->above_ref_ctx[col] == 1);
1806 c = 2 * (s->left_ref_ctx[row7] == 1) +
1807 2 * (s->above_ref_ctx[col] == 1);
1810 if (s->above_intra_ctx[col] ||
1811 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1813 } else if (s->above_comp_ctx[col]) {
1814 c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
1816 c = 4 * (s->above_ref_ctx[col] == 1);
1819 } else if (have_l) {
1820 if (s->left_intra_ctx[row7] ||
1821 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1823 } else if (s->left_comp_ctx[row7]) {
1824 c = 3 * (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1826 c = 4 * (s->left_ref_ctx[row7] == 1);
1831 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1832 s->counts.single_ref[c][1][bit]++;
1833 b->ref[0] = 1 + bit;
1838 if (b->bs <= BS_8x8) {
1839 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
1840 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1842 static const uint8_t off[10] = {
1843 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1846 // FIXME this needs to use the LUT tables from find_ref_mvs
1847 // because not all are -1,0/0,-1
1848 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1849 [s->left_mode_ctx[row7 + off[b->bs]]];
1851 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1852 s->prob.p.mv_mode[c]);
1853 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1854 s->counts.mv_mode[c][b->mode[0] - 10]++;
1858 if (s->s.h.filtermode == FILTER_SWITCHABLE) {
1861 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1862 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1863 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1864 s->left_filter_ctx[row7] : 3;
1866 c = s->above_filter_ctx[col];
1868 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1869 c = s->left_filter_ctx[row7];
1874 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1875 s->prob.p.filter[c]);
1876 s->counts.filter[c][filter_id]++;
1877 b->filter = vp9_filter_lut[filter_id];
1879 b->filter = s->s.h.filtermode;
1882 if (b->bs > BS_8x8) {
1883 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1885 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1886 s->prob.p.mv_mode[c]);
1887 s->counts.mv_mode[c][b->mode[0] - 10]++;
1888 fill_mv(s, b->mv[0], b->mode[0], 0);
1890 if (b->bs != BS_8x4) {
1891 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1892 s->prob.p.mv_mode[c]);
1893 s->counts.mv_mode[c][b->mode[1] - 10]++;
1894 fill_mv(s, b->mv[1], b->mode[1], 1);
1896 b->mode[1] = b->mode[0];
1897 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1898 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1901 if (b->bs != BS_4x8) {
1902 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1903 s->prob.p.mv_mode[c]);
1904 s->counts.mv_mode[c][b->mode[2] - 10]++;
1905 fill_mv(s, b->mv[2], b->mode[2], 2);
1907 if (b->bs != BS_8x4) {
1908 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1909 s->prob.p.mv_mode[c]);
1910 s->counts.mv_mode[c][b->mode[3] - 10]++;
1911 fill_mv(s, b->mv[3], b->mode[3], 3);
1913 b->mode[3] = b->mode[2];
1914 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1915 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1918 b->mode[2] = b->mode[0];
1919 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1920 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1921 b->mode[3] = b->mode[1];
1922 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1923 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1926 fill_mv(s, b->mv[0], b->mode[0], -1);
1927 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1928 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1929 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1930 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1931 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1932 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1935 vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
1939 #define SPLAT_CTX(var, val, n) \
1941 case 1: var = val; break; \
1942 case 2: AV_WN16A(&var, val * 0x0101); break; \
1943 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1944 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1946 uint64_t v64 = val * 0x0101010101010101ULL; \
1947 AV_WN64A( &var, v64); \
1948 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1953 #define SPLAT_CTX(var, val, n) \
1955 case 1: var = val; break; \
1956 case 2: AV_WN16A(&var, val * 0x0101); break; \
1957 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1959 uint32_t v32 = val * 0x01010101; \
1960 AV_WN32A( &var, v32); \
1961 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1965 uint32_t v32 = val * 0x01010101; \
1966 AV_WN32A( &var, v32); \
1967 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1968 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1969 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1975 switch (bwh_tab[1][b->bs][0]) {
1976 #define SET_CTXS(dir, off, n) \
1978 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1979 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1980 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1981 if (!s->s.h.keyframe && !s->s.h.intraonly) { \
1982 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1983 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1984 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1986 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1987 if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
1988 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1993 case 1: SET_CTXS(above, col, 1); break;
1994 case 2: SET_CTXS(above, col, 2); break;
1995 case 4: SET_CTXS(above, col, 4); break;
1996 case 8: SET_CTXS(above, col, 8); break;
1998 switch (bwh_tab[1][b->bs][1]) {
1999 case 1: SET_CTXS(left, row7, 1); break;
2000 case 2: SET_CTXS(left, row7, 2); break;
2001 case 4: SET_CTXS(left, row7, 4); break;
2002 case 8: SET_CTXS(left, row7, 8); break;
2007 if (!s->s.h.keyframe && !s->s.h.intraonly) {
2008 if (b->bs > BS_8x8) {
2009 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2011 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2012 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2013 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2014 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2015 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2016 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2017 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2018 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2020 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2022 for (n = 0; n < w4 * 2; n++) {
2023 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2024 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2026 for (n = 0; n < h4 * 2; n++) {
2027 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2028 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2034 for (y = 0; y < h4; y++) {
2035 int x, o = (row + y) * s->sb_cols * 8 + col;
2036 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
2039 for (x = 0; x < w4; x++) {
2043 } else if (b->comp) {
2044 for (x = 0; x < w4; x++) {
2045 mv[x].ref[0] = b->ref[0];
2046 mv[x].ref[1] = b->ref[1];
2047 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2048 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2051 for (x = 0; x < w4; x++) {
2052 mv[x].ref[0] = b->ref[0];
2054 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2060 // FIXME merge cnt/eob arguments?
2061 static av_always_inline int
2062 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2063 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2064 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2065 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2066 const int16_t *band_counts, const int16_t *qmul)
2068 int i = 0, band = 0, band_left = band_counts[band];
2069 uint8_t *tp = p[0][nnz];
2070 uint8_t cache[1024];
2075 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2076 eob[band][nnz][val]++;
2081 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2082 cnt[band][nnz][0]++;
2084 band_left = band_counts[++band];
2086 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2088 if (++i == n_coeffs)
2089 break; //invalid input; blocks should end with EOB
2094 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2095 cnt[band][nnz][1]++;
2099 // fill in p[3-10] (model fill) - only once per frame for each pos
2101 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2103 cnt[band][nnz][2]++;
2104 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2105 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2106 cache[rc] = val = 2;
2108 val = 3 + vp56_rac_get_prob(c, tp[5]);
2111 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2113 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2114 val = 5 + vp56_rac_get_prob(c, 159);
2116 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2117 val += vp56_rac_get_prob(c, 145);
2121 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2122 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2123 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2124 val += (vp56_rac_get_prob(c, 148) << 1);
2125 val += vp56_rac_get_prob(c, 140);
2127 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2128 val += (vp56_rac_get_prob(c, 155) << 2);
2129 val += (vp56_rac_get_prob(c, 140) << 1);
2130 val += vp56_rac_get_prob(c, 135);
2132 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2133 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2134 val += (vp56_rac_get_prob(c, 157) << 3);
2135 val += (vp56_rac_get_prob(c, 141) << 2);
2136 val += (vp56_rac_get_prob(c, 134) << 1);
2137 val += vp56_rac_get_prob(c, 130);
2140 if (!is8bitsperpixel) {
2142 val += vp56_rac_get_prob(c, 255) << 17;
2143 val += vp56_rac_get_prob(c, 255) << 16;
2145 val += (vp56_rac_get_prob(c, 255) << 15);
2146 val += (vp56_rac_get_prob(c, 255) << 14);
2148 val += (vp56_rac_get_prob(c, 254) << 13);
2149 val += (vp56_rac_get_prob(c, 254) << 12);
2150 val += (vp56_rac_get_prob(c, 254) << 11);
2151 val += (vp56_rac_get_prob(c, 252) << 10);
2152 val += (vp56_rac_get_prob(c, 249) << 9);
2153 val += (vp56_rac_get_prob(c, 243) << 8);
2154 val += (vp56_rac_get_prob(c, 230) << 7);
2155 val += (vp56_rac_get_prob(c, 196) << 6);
2156 val += (vp56_rac_get_prob(c, 177) << 5);
2157 val += (vp56_rac_get_prob(c, 153) << 4);
2158 val += (vp56_rac_get_prob(c, 140) << 3);
2159 val += (vp56_rac_get_prob(c, 133) << 2);
2160 val += (vp56_rac_get_prob(c, 130) << 1);
2161 val += vp56_rac_get_prob(c, 129);
2165 #define STORE_COEF(c, i, v) do { \
2166 if (is8bitsperpixel) { \
2169 AV_WN32A(&c[i * 2], v); \
2173 band_left = band_counts[++band];
2175 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2177 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2178 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2180 } while (++i < n_coeffs);
2185 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2186 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2187 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2188 const int16_t (*nb)[2], const int16_t *band_counts,
2189 const int16_t *qmul)
2191 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2192 nnz, scan, nb, band_counts, qmul);
2195 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2196 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2197 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2198 const int16_t (*nb)[2], const int16_t *band_counts,
2199 const int16_t *qmul)
2201 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2202 nnz, scan, nb, band_counts, qmul);
2205 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2206 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2207 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2208 const int16_t (*nb)[2], const int16_t *band_counts,
2209 const int16_t *qmul)
2211 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2212 nnz, scan, nb, band_counts, qmul);
2215 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2216 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2217 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2218 const int16_t (*nb)[2], const int16_t *band_counts,
2219 const int16_t *qmul)
2221 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2222 nnz, scan, nb, band_counts, qmul);
2225 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2227 VP9Context *s = ctx->priv_data;
2229 int row = s->row, col = s->col;
2230 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2231 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2232 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2233 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2234 int end_x = FFMIN(2 * (s->cols - col), w4);
2235 int end_y = FFMIN(2 * (s->rows - row), h4);
2236 int n, pl, x, y, res;
2237 int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
2238 int tx = 4 * s->s.h.lossless + b->tx;
2239 const int16_t * const *yscans = vp9_scans[tx];
2240 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2241 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2242 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2243 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2244 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2245 static const int16_t band_counts[4][8] = {
2246 { 1, 2, 3, 4, 3, 16 - 13 },
2247 { 1, 2, 3, 4, 11, 64 - 21 },
2248 { 1, 2, 3, 4, 11, 256 - 21 },
2249 { 1, 2, 3, 4, 11, 1024 - 21 },
2251 const int16_t *y_band_counts = band_counts[b->tx];
2252 const int16_t *uv_band_counts = band_counts[b->uvtx];
2253 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2254 int total_coeff = 0;
2256 #define MERGE(la, end, step, rd) \
2257 for (n = 0; n < end; n += step) \
2258 la[n] = !!rd(&la[n])
2259 #define MERGE_CTX(step, rd) \
2261 MERGE(l, end_y, step, rd); \
2262 MERGE(a, end_x, step, rd); \
2265 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2266 for (n = 0, y = 0; y < end_y; y += step) { \
2267 for (x = 0; x < end_x; x += step, n += step * step) { \
2268 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2269 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2270 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2271 c, e, p, a[x] + l[y], yscans[txtp], \
2272 ynbs[txtp], y_band_counts, qmul[0]); \
2273 a[x] = l[y] = !!res; \
2274 total_coeff |= !!res; \
2276 AV_WN16A(&s->eob[n], res); \
2283 #define SPLAT(la, end, step, cond) \
2285 for (n = 1; n < end; n += step) \
2286 la[n] = la[n - 1]; \
2287 } else if (step == 4) { \
2289 for (n = 0; n < end; n += step) \
2290 AV_WN32A(&la[n], la[n] * 0x01010101); \
2292 for (n = 0; n < end; n += step) \
2293 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2295 } else /* step == 8 */ { \
2297 if (HAVE_FAST_64BIT) { \
2298 for (n = 0; n < end; n += step) \
2299 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2301 for (n = 0; n < end; n += step) { \
2302 uint32_t v32 = la[n] * 0x01010101; \
2303 AV_WN32A(&la[n], v32); \
2304 AV_WN32A(&la[n + 4], v32); \
2308 for (n = 0; n < end; n += step) \
2309 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2312 #define SPLAT_CTX(step) \
2314 SPLAT(a, end_x, step, end_x == w4); \
2315 SPLAT(l, end_y, step, end_y == h4); \
2321 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2324 MERGE_CTX(2, AV_RN16A);
2325 DECODE_Y_COEF_LOOP(2, 0,);
2329 MERGE_CTX(4, AV_RN32A);
2330 DECODE_Y_COEF_LOOP(4, 0,);
2334 MERGE_CTX(8, AV_RN64A);
2335 DECODE_Y_COEF_LOOP(8, 0, 32);
2340 #define DECODE_UV_COEF_LOOP(step, v) \
2341 for (n = 0, y = 0; y < end_y; y += step) { \
2342 for (x = 0; x < end_x; x += step, n += step * step) { \
2343 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2344 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2345 16 * step * step, c, e, p, a[x] + l[y], \
2346 uvscan, uvnb, uv_band_counts, qmul[1]); \
2347 a[x] = l[y] = !!res; \
2348 total_coeff |= !!res; \
2350 AV_WN16A(&s->uveob[pl][n], res); \
2352 s->uveob[pl][n] = res; \
2357 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2358 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2359 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2364 for (pl = 0; pl < 2; pl++) {
2365 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2366 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2369 DECODE_UV_COEF_LOOP(1,);
2372 MERGE_CTX(2, AV_RN16A);
2373 DECODE_UV_COEF_LOOP(2,);
2377 MERGE_CTX(4, AV_RN32A);
2378 DECODE_UV_COEF_LOOP(4,);
2382 MERGE_CTX(8, AV_RN64A);
2383 DECODE_UV_COEF_LOOP(8, 32);
2392 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2394 return decode_coeffs(ctx, 1);
2397 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2399 return decode_coeffs(ctx, 0);
2402 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2403 uint8_t *dst_edge, ptrdiff_t stride_edge,
2404 uint8_t *dst_inner, ptrdiff_t stride_inner,
2405 uint8_t *l, int col, int x, int w,
2406 int row, int y, enum TxfmMode tx,
2407 int p, int ss_h, int ss_v, int bytesperpixel)
2409 int have_top = row > 0 || y > 0;
2410 int have_left = col > s->tile_col_start || x > 0;
2411 int have_right = x < w - 1;
2413 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2414 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2415 { DC_127_PRED, VERT_PRED } },
2416 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2417 { HOR_PRED, HOR_PRED } },
2418 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2419 { LEFT_DC_PRED, DC_PRED } },
2420 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2421 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2422 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2423 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2424 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2425 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2426 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2427 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2428 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2429 { DC_127_PRED, VERT_LEFT_PRED } },
2430 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2431 { HOR_UP_PRED, HOR_UP_PRED } },
2432 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2433 { HOR_PRED, TM_VP8_PRED } },
2435 static const struct {
2436 uint8_t needs_left:1;
2437 uint8_t needs_top:1;
2438 uint8_t needs_topleft:1;
2439 uint8_t needs_topright:1;
2440 uint8_t invert_left:1;
2441 } edges[N_INTRA_PRED_MODES] = {
2442 [VERT_PRED] = { .needs_top = 1 },
2443 [HOR_PRED] = { .needs_left = 1 },
2444 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2445 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2446 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2447 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2448 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2449 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2450 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2451 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2452 [LEFT_DC_PRED] = { .needs_left = 1 },
2453 [TOP_DC_PRED] = { .needs_top = 1 },
2454 [DC_128_PRED] = { 0 },
2455 [DC_127_PRED] = { 0 },
2456 [DC_129_PRED] = { 0 }
2459 av_assert2(mode >= 0 && mode < 10);
2460 mode = mode_conv[mode][have_left][have_top];
2461 if (edges[mode].needs_top) {
2462 uint8_t *top, *topleft;
2463 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2464 int n_px_need_tr = 0;
2466 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2469 // if top of sb64-row, use s->intra_pred_data[] instead of
2470 // dst[-stride] for intra prediction (it contains pre- instead of
2471 // post-loopfilter data)
2473 top = !(row & 7) && !y ?
2474 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2475 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2477 topleft = !(row & 7) && !y ?
2478 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2479 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2480 &dst_inner[-stride_inner];
2484 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2485 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2486 n_px_need + n_px_need_tr <= n_px_have) {
2490 if (n_px_need <= n_px_have) {
2491 memcpy(*a, top, n_px_need * bytesperpixel);
2493 #define memset_bpp(c, i1, v, i2, num) do { \
2494 if (bytesperpixel == 1) { \
2495 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2497 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2498 for (n = 0; n < (num); n++) { \
2499 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2503 memcpy(*a, top, n_px_have * bytesperpixel);
2504 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2507 #define memset_val(c, val, num) do { \
2508 if (bytesperpixel == 1) { \
2509 memset((c), (val), (num)); \
2512 for (n = 0; n < (num); n++) { \
2513 AV_WN16A(&(c)[n * 2], (val)); \
2517 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2519 if (edges[mode].needs_topleft) {
2520 if (have_left && have_top) {
2521 #define assign_bpp(c, i1, v, i2) do { \
2522 if (bytesperpixel == 1) { \
2523 (c)[(i1)] = (v)[(i2)]; \
2525 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2528 assign_bpp(*a, -1, topleft, -1);
2530 #define assign_val(c, i, v) do { \
2531 if (bytesperpixel == 1) { \
2534 AV_WN16A(&(c)[(i) * 2], (v)); \
2537 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2540 if (tx == TX_4X4 && edges[mode].needs_topright) {
2541 if (have_top && have_right &&
2542 n_px_need + n_px_need_tr <= n_px_have) {
2543 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2545 memset_bpp(*a, 4, *a, 3, 4);
2550 if (edges[mode].needs_left) {
2552 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2553 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2554 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2556 if (edges[mode].invert_left) {
2557 if (n_px_need <= n_px_have) {
2558 for (i = 0; i < n_px_need; i++)
2559 assign_bpp(l, i, &dst[i * stride], -1);
2561 for (i = 0; i < n_px_have; i++)
2562 assign_bpp(l, i, &dst[i * stride], -1);
2563 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2566 if (n_px_need <= n_px_have) {
2567 for (i = 0; i < n_px_need; i++)
2568 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2570 for (i = 0; i < n_px_have; i++)
2571 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2572 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2576 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2583 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2584 ptrdiff_t uv_off, int bytesperpixel)
2586 VP9Context *s = ctx->priv_data;
2588 int row = s->row, col = s->col;
2589 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2590 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2591 int end_x = FFMIN(2 * (s->cols - col), w4);
2592 int end_y = FFMIN(2 * (s->rows - row), h4);
2593 int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
2594 int uvstep1d = 1 << b->uvtx, p;
2595 uint8_t *dst = s->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
2596 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2597 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2599 for (n = 0, y = 0; y < end_y; y += step1d) {
2600 uint8_t *ptr = dst, *ptr_r = dst_r;
2601 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2602 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2603 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2605 uint8_t *a = &a_buf[32];
2606 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2607 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2609 mode = check_intra_mode(s, mode, &a, ptr_r,
2610 s->s.frames[CUR_FRAME].tf.f->linesize[0],
2611 ptr, s->y_stride, l,
2612 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2613 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2615 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2616 s->block + 16 * n * bytesperpixel, eob);
2618 dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
2619 dst += 4 * step1d * s->y_stride;
2626 step = 1 << (b->uvtx * 2);
2627 for (p = 0; p < 2; p++) {
2628 dst = s->dst[1 + p];
2629 dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2630 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2631 uint8_t *ptr = dst, *ptr_r = dst_r;
2632 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2633 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2634 int mode = b->uvmode;
2635 uint8_t *a = &a_buf[32];
2636 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2638 mode = check_intra_mode(s, mode, &a, ptr_r,
2639 s->s.frames[CUR_FRAME].tf.f->linesize[1],
2640 ptr, s->uv_stride, l, col, x, w4, row, y,
2641 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2642 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2644 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2645 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2647 dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
2648 dst += 4 * uvstep1d * s->uv_stride;
2653 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2655 intra_recon(ctx, y_off, uv_off, 1);
2658 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2660 intra_recon(ctx, y_off, uv_off, 2);
2663 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2664 uint8_t *dst, ptrdiff_t dst_stride,
2665 const uint8_t *ref, ptrdiff_t ref_stride,
2666 ThreadFrame *ref_frame,
2667 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2668 int bw, int bh, int w, int h, int bytesperpixel)
2670 int mx = mv->x, my = mv->y, th;
2674 ref += y * ref_stride + x * bytesperpixel;
2677 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2678 // we use +7 because the last 7 pixels of each sbrow can be changed in
2679 // the longest loopfilter of the next sbrow
2680 th = (y + bh + 4 * !!my + 7) >> 6;
2681 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2682 if (x < !!mx * 3 || y < !!my * 3 ||
2683 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2684 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2685 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2687 bw + !!mx * 7, bh + !!my * 7,
2688 x - !!mx * 3, y - !!my * 3, w, h);
2689 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2692 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2695 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2696 uint8_t *dst_u, uint8_t *dst_v,
2697 ptrdiff_t dst_stride,
2698 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2699 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2700 ThreadFrame *ref_frame,
2701 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2702 int bw, int bh, int w, int h, int bytesperpixel)
2704 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2708 ref_u += y * src_stride_u + x * bytesperpixel;
2709 ref_v += y * src_stride_v + x * bytesperpixel;
2712 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2713 // we use +7 because the last 7 pixels of each sbrow can be changed in
2714 // the longest loopfilter of the next sbrow
2715 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2716 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2717 if (x < !!mx * 3 || y < !!my * 3 ||
2718 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2719 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2720 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2722 bw + !!mx * 7, bh + !!my * 7,
2723 x - !!mx * 3, y - !!my * 3, w, h);
2724 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2725 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2727 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2728 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2730 bw + !!mx * 7, bh + !!my * 7,
2731 x - !!mx * 3, y - !!my * 3, w, h);
2732 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2733 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2735 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2736 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2740 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2741 px, py, pw, ph, bw, bh, w, h, i) \
2742 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2743 mv, bw, bh, w, h, bytesperpixel)
2744 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2745 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2746 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2747 row, col, mv, bw, bh, w, h, bytesperpixel)
2749 #define FN(x) x##_8bpp
2750 #define BYTES_PER_PIXEL 1
2751 #include "vp9_mc_template.c"
2753 #undef BYTES_PER_PIXEL
2754 #define FN(x) x##_16bpp
2755 #define BYTES_PER_PIXEL 2
2756 #include "vp9_mc_template.c"
2758 #undef mc_chroma_dir
2760 #undef BYTES_PER_PIXEL
2763 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2764 vp9_mc_func (*mc)[2],
2765 uint8_t *dst, ptrdiff_t dst_stride,
2766 const uint8_t *ref, ptrdiff_t ref_stride,
2767 ThreadFrame *ref_frame,
2768 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2769 int px, int py, int pw, int ph,
2770 int bw, int bh, int w, int h, int bytesperpixel,
2771 const uint16_t *scale, const uint8_t *step)
2773 if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2774 s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2775 mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
2776 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2778 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2780 int refbw_m1, refbh_m1;
2784 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2785 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2786 // BUG libvpx seems to scale the two components separately. This introduces
2787 // rounding errors but we have to reproduce them to be exactly compatible
2788 // with the output from libvpx...
2789 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2790 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2794 ref += y * ref_stride + x * bytesperpixel;
2797 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2798 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2799 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2800 // we use +7 because the last 7 pixels of each sbrow can be changed in
2801 // the longest loopfilter of the next sbrow
2802 th = (y + refbh_m1 + 4 + 7) >> 6;
2803 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2804 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2805 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2806 ref - 3 * ref_stride - 3 * bytesperpixel,
2808 refbw_m1 + 8, refbh_m1 + 8,
2809 x - 3, y - 3, w, h);
2810 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2813 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2817 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2818 vp9_mc_func (*mc)[2],
2819 uint8_t *dst_u, uint8_t *dst_v,
2820 ptrdiff_t dst_stride,
2821 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2822 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2823 ThreadFrame *ref_frame,
2824 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2825 int px, int py, int pw, int ph,
2826 int bw, int bh, int w, int h, int bytesperpixel,
2827 const uint16_t *scale, const uint8_t *step)
2829 if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2830 s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2831 mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
2832 ref_v, src_stride_v, ref_frame,
2833 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2836 int refbw_m1, refbh_m1;
2841 // BUG https://code.google.com/p/webm/issues/detail?id=820
2842 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2843 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2845 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2846 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2849 // BUG https://code.google.com/p/webm/issues/detail?id=820
2850 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2851 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2853 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2854 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2859 ref_u += y * src_stride_u + x * bytesperpixel;
2860 ref_v += y * src_stride_v + x * bytesperpixel;
2863 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2864 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2865 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2866 // we use +7 because the last 7 pixels of each sbrow can be changed in
2867 // the longest loopfilter of the next sbrow
2868 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2869 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2870 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2871 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2872 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2874 refbw_m1 + 8, refbh_m1 + 8,
2875 x - 3, y - 3, w, h);
2876 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2877 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2879 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2880 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2882 refbw_m1 + 8, refbh_m1 + 8,
2883 x - 3, y - 3, w, h);
2884 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2885 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2887 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2888 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2893 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2894 px, py, pw, ph, bw, bh, w, h, i) \
2895 mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2896 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2897 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2898 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2899 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2900 mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2901 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2902 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2904 #define FN(x) x##_scaled_8bpp
2905 #define BYTES_PER_PIXEL 1
2906 #include "vp9_mc_template.c"
2908 #undef BYTES_PER_PIXEL
2909 #define FN(x) x##_scaled_16bpp
2910 #define BYTES_PER_PIXEL 2
2911 #include "vp9_mc_template.c"
2913 #undef mc_chroma_dir
2915 #undef BYTES_PER_PIXEL
2918 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
2920 VP9Context *s = ctx->priv_data;
2922 int row = s->row, col = s->col;
2924 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2925 if (bytesperpixel == 1) {
2926 inter_pred_scaled_8bpp(ctx);
2928 inter_pred_scaled_16bpp(ctx);
2931 if (bytesperpixel == 1) {
2932 inter_pred_8bpp(ctx);
2934 inter_pred_16bpp(ctx);
2938 /* mostly copied intra_recon() */
2940 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2941 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2942 int end_x = FFMIN(2 * (s->cols - col), w4);
2943 int end_y = FFMIN(2 * (s->rows - row), h4);
2944 int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
2945 int uvstep1d = 1 << b->uvtx, p;
2946 uint8_t *dst = s->dst[0];
2949 for (n = 0, y = 0; y < end_y; y += step1d) {
2951 for (x = 0; x < end_x; x += step1d,
2952 ptr += 4 * step1d * bytesperpixel, n += step) {
2953 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2956 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2957 s->block + 16 * n * bytesperpixel, eob);
2959 dst += 4 * s->y_stride * step1d;
2965 step = 1 << (b->uvtx * 2);
2966 for (p = 0; p < 2; p++) {
2967 dst = s->dst[p + 1];
2968 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2970 for (x = 0; x < end_x; x += uvstep1d,
2971 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
2972 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2975 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2976 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2978 dst += 4 * uvstep1d * s->uv_stride;
2984 static void inter_recon_8bpp(AVCodecContext *ctx)
2986 inter_recon(ctx, 1);
2989 static void inter_recon_16bpp(AVCodecContext *ctx)
2991 inter_recon(ctx, 2);
2994 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
2995 int row_and_7, int col_and_7,
2996 int w, int h, int col_end, int row_end,
2997 enum TxfmMode tx, int skip_inter)
2999 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3000 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3002 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3003 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3004 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3005 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3007 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3008 // edges. This means that for UV, we work on two subsampled blocks at
3009 // a time, and we only use the topleft block's mode information to set
3010 // things like block strength. Thus, for any block size smaller than
3011 // 16x16, ignore the odd portion of the block.
3012 if (tx == TX_4X4 && (ss_v | ss_h)) {
3027 if (tx == TX_4X4 && !skip_inter) {
3028 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3029 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3030 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3032 for (y = row_and_7; y < h + row_and_7; y++) {
3033 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3035 mask[0][y][1] |= m_row_8;
3036 mask[0][y][2] |= m_row_4;
3037 // for odd lines, if the odd col is not being filtered,
3038 // skip odd row also:
3045 // if a/c are even row/col and b/d are odd, and d is skipped,
3046 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3047 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3048 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3050 mask[1][y][col_mask_id] |= m_col;
3053 mask[0][y][3] |= m_col;
3055 if (ss_h && (col_end & 1))
3056 mask[1][y][3] |= (t << (w - 1)) - t;
3058 mask[1][y][3] |= m_col;
3062 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3065 int mask_id = (tx == TX_8X8);
3066 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3067 int l2 = tx + ss_h - 1, step1d;
3068 int m_row = m_col & masks[l2];
3070 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3071 // 8wd loopfilter to prevent going off the visible edge.
3072 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3073 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3074 int m_row_8 = m_row - m_row_16;
3076 for (y = row_and_7; y < h + row_and_7; y++) {
3077 mask[0][y][0] |= m_row_16;
3078 mask[0][y][1] |= m_row_8;
3081 for (y = row_and_7; y < h + row_and_7; y++)
3082 mask[0][y][mask_id] |= m_row;
3087 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3088 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3089 mask[1][y][0] |= m_col;
3090 if (y - row_and_7 == h - 1)
3091 mask[1][y][1] |= m_col;
3093 for (y = row_and_7; y < h + row_and_7; y += step1d)
3094 mask[1][y][mask_id] |= m_col;
3096 } else if (tx != TX_4X4) {
3099 mask_id = (tx == TX_8X8) || (h == ss_v);
3100 mask[1][row_and_7][mask_id] |= m_col;
3101 mask_id = (tx == TX_8X8) || (w == ss_h);
3102 for (y = row_and_7; y < h + row_and_7; y++)
3103 mask[0][y][mask_id] |= t;
3105 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3107 for (y = row_and_7; y < h + row_and_7; y++) {
3108 mask[0][y][2] |= t4;
3109 mask[0][y][1] |= t8;
3111 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3116 static void decode_b(AVCodecContext *ctx, int row, int col,
3117 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3118 enum BlockLevel bl, enum BlockPartition bp)
3120 VP9Context *s = ctx->priv_data;
3122 enum BlockSize bs = bl * 3 + bp;
3123 int bytesperpixel = s->bytesperpixel;
3124 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3126 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3132 s->min_mv.x = -(128 + col * 64);
3133 s->min_mv.y = -(128 + row * 64);
3134 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3135 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3141 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3142 (s->ss_v && h4 * 2 == (1 << b->tx)));
3147 if (bytesperpixel == 1) {
3148 has_coeffs = decode_coeffs_8bpp(ctx);
3150 has_coeffs = decode_coeffs_16bpp(ctx);
3152 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3154 memset(&s->above_skip_ctx[col], 1, w4);
3155 memset(&s->left_skip_ctx[s->row7], 1, h4);
3160 #define SPLAT_ZERO_CTX(v, n) \
3162 case 1: v = 0; break; \
3163 case 2: AV_ZERO16(&v); break; \
3164 case 4: AV_ZERO32(&v); break; \
3165 case 8: AV_ZERO64(&v); break; \
3166 case 16: AV_ZERO128(&v); break; \
3168 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3170 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3171 if (s->ss_##dir2) { \
3172 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3173 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3175 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3176 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3181 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3182 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3183 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3184 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3187 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3188 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3189 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3190 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3196 s->block += w4 * h4 * 64 * bytesperpixel;
3197 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3198 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3199 s->eob += 4 * w4 * h4;
3200 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3201 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3207 // emulated overhangs if the stride of the target buffer can't hold. This
3208 // makes it possible to support emu-edge and so on even if we have large block
3210 emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
3211 (row + h4) > s->rows;
3212 emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
3213 (row + h4) > s->rows;
3215 s->dst[0] = s->tmp_y;
3218 s->dst[0] = f->data[0] + yoff;
3219 s->y_stride = f->linesize[0];
3222 s->dst[1] = s->tmp_uv[0];
3223 s->dst[2] = s->tmp_uv[1];
3226 s->dst[1] = f->data[1] + uvoff;
3227 s->dst[2] = f->data[2] + uvoff;
3228 s->uv_stride = f->linesize[1];
3232 intra_recon_16bpp(ctx, yoff, uvoff);
3234 intra_recon_8bpp(ctx, yoff, uvoff);
3238 inter_recon_16bpp(ctx);
3240 inter_recon_8bpp(ctx);
3244 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3246 for (n = 0; o < w; n++) {
3251 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
3252 s->tmp_y + o * bytesperpixel, 128, h, 0, 0);
3258 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3259 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3261 for (n = s->ss_h; o < w; n++) {
3266 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
3267 s->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
3268 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
3269 s->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
3275 // pick filter level and find edges to apply filter to
3276 if (s->s.h.filter.level &&
3277 (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3278 [b->mode[3] != ZEROMV]) > 0) {
3279 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3280 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3282 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3283 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3284 if (s->ss_h || s->ss_v)
3285 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3286 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3287 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3288 b->uvtx, skip_inter);
3290 if (!s->filter_lut.lim_lut[lvl]) {
3291 int sharp = s->s.h.filter.sharpness;
3295 limit >>= (sharp + 3) >> 2;
3296 limit = FFMIN(limit, 9 - sharp);
3298 limit = FFMAX(limit, 1);
3300 s->filter_lut.lim_lut[lvl] = limit;
3301 s->filter_lut.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3307 s->block += w4 * h4 * 64 * bytesperpixel;
3308 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3309 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3310 s->eob += 4 * w4 * h4;
3311 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3312 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3316 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3317 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3319 VP9Context *s = ctx->priv_data;
3320 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3321 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3322 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? vp9_default_kf_partition_probs[bl][c] :
3323 s->prob.p.partition[bl][c];
3324 enum BlockPartition bp;
3325 ptrdiff_t hbs = 4 >> bl;
3326 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3327 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3328 int bytesperpixel = s->bytesperpixel;
3331 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3332 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3333 } else if (col + hbs < s->cols) { // FIXME why not <=?
3334 if (row + hbs < s->rows) { // FIXME why not <=?
3335 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3337 case PARTITION_NONE:
3338 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3341 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3342 yoff += hbs * 8 * y_stride;
3343 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3344 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3347 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3348 yoff += hbs * 8 * bytesperpixel;
3349 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3350 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3352 case PARTITION_SPLIT:
3353 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3354 decode_sb(ctx, row, col + hbs, lflvl,
3355 yoff + 8 * hbs * bytesperpixel,
3356 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3357 yoff += hbs * 8 * y_stride;
3358 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3359 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3360 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3361 yoff + 8 * hbs * bytesperpixel,
3362 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3367 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3368 bp = PARTITION_SPLIT;
3369 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3370 decode_sb(ctx, row, col + hbs, lflvl,
3371 yoff + 8 * hbs * bytesperpixel,
3372 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3375 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3377 } else if (row + hbs < s->rows) { // FIXME why not <=?
3378 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3379 bp = PARTITION_SPLIT;
3380 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3381 yoff += hbs * 8 * y_stride;
3382 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3383 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3386 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3389 bp = PARTITION_SPLIT;
3390 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3392 s->counts.partition[bl][c][bp]++;
3395 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3396 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3398 VP9Context *s = ctx->priv_data;
3400 ptrdiff_t hbs = 4 >> bl;
3401 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3402 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3403 int bytesperpixel = s->bytesperpixel;
3406 av_assert2(b->bl == BL_8X8);
3407 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3408 } else if (s->b->bl == bl) {
3409 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3410 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3411 yoff += hbs * 8 * y_stride;
3412 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3413 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3414 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3415 yoff += hbs * 8 * bytesperpixel;
3416 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3417 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3420 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3421 if (col + hbs < s->cols) { // FIXME why not <=?
3422 if (row + hbs < s->rows) {
3423 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3424 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3425 yoff += hbs * 8 * y_stride;
3426 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3427 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3428 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3429 yoff + 8 * hbs * bytesperpixel,
3430 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3432 yoff += hbs * 8 * bytesperpixel;
3433 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3434 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3436 } else if (row + hbs < s->rows) {
3437 yoff += hbs * 8 * y_stride;
3438 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3439 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3444 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3445 uint8_t *lvl, uint8_t (*mask)[4],
3446 uint8_t *dst, ptrdiff_t ls)
3448 int y, x, bytesperpixel = s->bytesperpixel;
3450 // filter edges between columns (e.g. block1 | block2)
3451 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3452 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3453 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3454 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3455 unsigned hm = hm1 | hm2 | hm13 | hm23;
3457 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3460 int L = *l, H = L >> 4;
3461 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3463 if (hmask1[0] & x) {
3464 if (hmask2[0] & x) {
3465 av_assert2(l[8 << ss_v] == L);
3466 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3468 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3470 } else if (hm2 & x) {
3473 E |= s->filter_lut.mblim_lut[L] << 8;
3474 I |= s->filter_lut.lim_lut[L] << 8;
3475 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3477 [0](ptr, ls, E, I, H);
3479 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3480 [0](ptr, ls, E, I, H);
3482 } else if (hm2 & x) {
3483 int L = l[8 << ss_v], H = L >> 4;
3484 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3486 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3487 [0](ptr + 8 * ls, ls, E, I, H);
3495 int L = *l, H = L >> 4;
3496 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3501 E |= s->filter_lut.mblim_lut[L] << 8;
3502 I |= s->filter_lut.lim_lut[L] << 8;
3503 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3505 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3507 } else if (hm23 & x) {
3508 int L = l[8 << ss_v], H = L >> 4;
3509 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3511 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3519 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3520 uint8_t *lvl, uint8_t (*mask)[4],
3521 uint8_t *dst, ptrdiff_t ls)
3523 int y, x, bytesperpixel = s->bytesperpixel;
3526 // filter edges between rows (e.g. ------)
3528 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3529 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3530 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3532 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3535 int L = *l, H = L >> 4;
3536 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3539 if (vmask[0] & (x << (1 + ss_h))) {
3540 av_assert2(l[1 + ss_h] == L);
3541 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3543 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3545 } else if (vm & (x << (1 + ss_h))) {
3548 E |= s->filter_lut.mblim_lut[L] << 8;
3549 I |= s->filter_lut.lim_lut[L] << 8;
3550 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3551 [!!(vmask[1] & (x << (1 + ss_h)))]
3552 [1](ptr, ls, E, I, H);
3554 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3555 [1](ptr, ls, E, I, H);
3557 } else if (vm & (x << (1 + ss_h))) {
3558 int L = l[1 + ss_h], H = L >> 4;
3559 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3561 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3562 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3567 int L = *l, H = L >> 4;
3568 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3570 if (vm3 & (x << (1 + ss_h))) {
3573 E |= s->filter_lut.mblim_lut[L] << 8;
3574 I |= s->filter_lut.lim_lut[L] << 8;
3575 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3577 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3579 } else if (vm3 & (x << (1 + ss_h))) {
3580 int L = l[1 + ss_h], H = L >> 4;
3581 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3583 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3596 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3597 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3599 VP9Context *s = ctx->priv_data;
3600 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3601 uint8_t *dst = f->data[0] + yoff;
3602 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3603 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3606 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3607 // if you think of them as acting on a 8x8 block max, we can interleave
3608 // each v/h within the single x loop, but that only works if we work on
3609 // 8 pixel blocks, and we won't always do that (we want at least 16px
3610 // to use SSE2 optimizations, perhaps 32 for AVX2)
3612 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3613 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3615 for (p = 0; p < 2; p++) {
3616 dst = f->data[1 + p] + uvoff;
3617 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3618 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3622 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3624 int sb_start = ( idx * n) >> log2_n;
3625 int sb_end = ((idx + 1) * n) >> log2_n;
3626 *start = FFMIN(sb_start, n) << 3;
3627 *end = FFMIN(sb_end, n) << 3;
3630 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3631 int max_count, int update_factor)
3633 unsigned ct = ct0 + ct1, p2, p1;
3639 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3640 p2 = av_clip(p2, 1, 255);
3641 ct = FFMIN(ct, max_count);
3642 update_factor = FASTDIV(update_factor * ct, max_count);
3644 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3645 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3648 static void adapt_probs(VP9Context *s)
3651 prob_context *p = &s->prob_ctx[s->s.h.framectxid].p;
3652 int uf = (s->s.h.keyframe || s->s.h.intraonly || !s->last_keyframe) ? 112 : 128;
3655 for (i = 0; i < 4; i++)
3656 for (j = 0; j < 2; j++)
3657 for (k = 0; k < 2; k++)
3658 for (l = 0; l < 6; l++)
3659 for (m = 0; m < 6; m++) {
3660 uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m];
3661 unsigned *e = s->counts.eob[i][j][k][l][m];
3662 unsigned *c = s->counts.coef[i][j][k][l][m];
3664 if (l == 0 && m >= 3) // dc only has 3 pt
3667 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3668 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3669 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3672 if (s->s.h.keyframe || s->s.h.intraonly) {
3673 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3674 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3675 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3676 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3681 for (i = 0; i < 3; i++)
3682 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3685 for (i = 0; i < 4; i++)
3686 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3689 if (s->s.h.comppredmode == PRED_SWITCHABLE) {
3690 for (i = 0; i < 5; i++)
3691 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3695 if (s->s.h.comppredmode != PRED_SINGLEREF) {
3696 for (i = 0; i < 5; i++)
3697 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3698 s->counts.comp_ref[i][1], 20, 128);
3701 if (s->s.h.comppredmode != PRED_COMPREF) {
3702 for (i = 0; i < 5; i++) {
3703 uint8_t *pp = p->single_ref[i];
3704 unsigned (*c)[2] = s->counts.single_ref[i];
3706 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3707 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3711 // block partitioning
3712 for (i = 0; i < 4; i++)
3713 for (j = 0; j < 4; j++) {
3714 uint8_t *pp = p->partition[i][j];
3715 unsigned *c = s->counts.partition[i][j];
3717 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3718 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3719 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3723 if (s->s.h.txfmmode == TX_SWITCHABLE) {
3724 for (i = 0; i < 2; i++) {
3725 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3727 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3728 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3729 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3730 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3731 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3732 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3736 // interpolation filter
3737 if (s->s.h.filtermode == FILTER_SWITCHABLE) {
3738 for (i = 0; i < 4; i++) {
3739 uint8_t *pp = p->filter[i];
3740 unsigned *c = s->counts.filter[i];
3742 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3743 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3748 for (i = 0; i < 7; i++) {
3749 uint8_t *pp = p->mv_mode[i];
3750 unsigned *c = s->counts.mv_mode[i];
3752 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3753 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3754 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3759 uint8_t *pp = p->mv_joint;
3760 unsigned *c = s->counts.mv_joint;
3762 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3763 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3764 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3768 for (i = 0; i < 2; i++) {
3770 unsigned *c, (*c2)[2], sum;
3772 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3773 s->counts.mv_comp[i].sign[1], 20, 128);
3775 pp = p->mv_comp[i].classes;
3776 c = s->counts.mv_comp[i].classes;
3777 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3778 adapt_prob(&pp[0], c[0], sum, 20, 128);
3780 adapt_prob(&pp[1], c[1], sum, 20, 128);
3782 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3783 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3785 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3786 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3788 adapt_prob(&pp[6], c[6], sum, 20, 128);
3789 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3790 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3791 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3793 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3794 s->counts.mv_comp[i].class0[1], 20, 128);
3795 pp = p->mv_comp[i].bits;
3796 c2 = s->counts.mv_comp[i].bits;
3797 for (j = 0; j < 10; j++)
3798 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3800 for (j = 0; j < 2; j++) {
3801 pp = p->mv_comp[i].class0_fp[j];
3802 c = s->counts.mv_comp[i].class0_fp[j];
3803 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3804 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3805 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3807 pp = p->mv_comp[i].fp;
3808 c = s->counts.mv_comp[i].fp;
3809 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3810 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3811 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3813 if (s->s.h.highprecisionmvs) {
3814 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3815 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3816 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3817 s->counts.mv_comp[i].hp[1], 20, 128);
3822 for (i = 0; i < 4; i++) {
3823 uint8_t *pp = p->y_mode[i];
3824 unsigned *c = s->counts.y_mode[i], sum, s2;
3826 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3827 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3828 sum -= c[TM_VP8_PRED];
3829 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3830 sum -= c[VERT_PRED];
3831 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3832 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3834 adapt_prob(&pp[3], s2, sum, 20, 128);
3836 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3837 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3838 sum -= c[DIAG_DOWN_LEFT_PRED];
3839 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3840 sum -= c[VERT_LEFT_PRED];
3841 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3842 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3846 for (i = 0; i < 10; i++) {
3847 uint8_t *pp = p->uv_mode[i];
3848 unsigned *c = s->counts.uv_mode[i], sum, s2;
3850 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3851 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3852 sum -= c[TM_VP8_PRED];
3853 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3854 sum -= c[VERT_PRED];
3855 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3856 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3858 adapt_prob(&pp[3], s2, sum, 20, 128);
3860 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3861 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3862 sum -= c[DIAG_DOWN_LEFT_PRED];
3863 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3864 sum -= c[VERT_LEFT_PRED];
3865 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3866 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3870 static void free_buffers(VP9Context *s)
3872 av_freep(&s->intra_pred_data[0]);
3873 av_freep(&s->b_base);
3874 av_freep(&s->block_base);
3877 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3879 VP9Context *s = ctx->priv_data;
3882 for (i = 0; i < 3; i++) {
3883 if (s->s.frames[i].tf.f->buf[0])
3884 vp9_unref_frame(ctx, &s->s.frames[i]);
3885 av_frame_free(&s->s.frames[i].tf.f);
3887 for (i = 0; i < 8; i++) {
3888 if (s->s.refs[i].f->buf[0])
3889 ff_thread_release_buffer(ctx, &s->s.refs[i]);
3890 av_frame_free(&s->s.refs[i].f);
3891 if (s->next_refs[i].f->buf[0])
3892 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3893 av_frame_free(&s->next_refs[i].f);
3903 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3904 int *got_frame, AVPacket *pkt)
3906 const uint8_t *data = pkt->data;
3907 int size = pkt->size;
3908 VP9Context *s = ctx->priv_data;
3909 int res, tile_row, tile_col, i, ref, row, col;
3910 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
3911 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
3912 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3916 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3918 } else if (res == 0) {
3919 if (!s->s.refs[ref].f->buf[0]) {
3920 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3921 return AVERROR_INVALIDDATA;
3923 if ((res = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
3925 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3926 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3927 for (i = 0; i < 8; i++) {
3928 if (s->next_refs[i].f->buf[0])
3929 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3930 if (s->s.refs[i].f->buf[0] &&
3931 (res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
3940 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
3941 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
3942 vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
3943 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
3944 (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
3947 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
3948 vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR]);
3949 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
3950 (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
3952 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
3953 vp9_unref_frame(ctx, &s->s.frames[CUR_FRAME]);
3954 if ((res = vp9_alloc_frame(ctx, &s->s.frames[CUR_FRAME])) < 0)
3956 f = s->s.frames[CUR_FRAME].tf.f;
3957 f->key_frame = s->s.h.keyframe;
3958 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3959 ls_y = f->linesize[0];
3960 ls_uv =f->linesize[1];
3962 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
3963 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
3964 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
3965 vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
3969 for (i = 0; i < 8; i++) {
3970 if (s->next_refs[i].f->buf[0])
3971 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3972 if (s->s.h.refreshrefmask & (1 << i)) {
3973 res = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
3974 } else if (s->s.refs[i].f->buf[0]) {
3975 res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
3981 // main tile decode loop
3982 bytesperpixel = s->bytesperpixel;
3983 memset(s->above_partition_ctx, 0, s->cols);
3984 memset(s->above_skip_ctx, 0, s->cols);
3985 if (s->s.h.keyframe || s->s.h.intraonly) {
3986 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3988 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3990 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3991 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
3992 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
3993 memset(s->above_segpred_ctx, 0, s->cols);
3994 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
3995 ctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
3996 if ((res = update_block_buffers(ctx)) < 0) {
3997 av_log(ctx, AV_LOG_ERROR,
3998 "Failed to allocate block buffers\n");
4001 if (s->s.h.refreshctx && s->s.h.parallelmode) {
4004 for (i = 0; i < 4; i++) {
4005 for (j = 0; j < 2; j++)
4006 for (k = 0; k < 2; k++)
4007 for (l = 0; l < 6; l++)
4008 for (m = 0; m < 6; m++)
4009 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
4010 s->prob.coef[i][j][k][l][m], 3);
4011 if (s->s.h.txfmmode == i)
4014 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
4015 ff_thread_finish_setup(ctx);
4016 } else if (!s->s.h.refreshctx) {
4017 ff_thread_finish_setup(ctx);
4023 s->block = s->block_base;
4024 s->uvblock[0] = s->uvblock_base[0];
4025 s->uvblock[1] = s->uvblock_base[1];
4026 s->eob = s->eob_base;
4027 s->uveob[0] = s->uveob_base[0];
4028 s->uveob[1] = s->uveob_base[1];
4030 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
4031 set_tile_offset(&s->tile_row_start, &s->tile_row_end,
4032 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
4034 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
4037 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
4038 tile_row == s->s.h.tiling.tile_rows - 1) {
4041 tile_size = AV_RB32(data);
4045 if (tile_size > size) {
4046 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4047 return AVERROR_INVALIDDATA;
4049 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4050 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4051 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4052 return AVERROR_INVALIDDATA;
4059 for (row = s->tile_row_start; row < s->tile_row_end;
4060 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4061 struct VP9Filter *lflvl_ptr = s->lflvl;
4062 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4064 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
4065 set_tile_offset(&s->tile_col_start, &s->tile_col_end,
4066 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
4069 memset(s->left_partition_ctx, 0, 8);
4070 memset(s->left_skip_ctx, 0, 8);
4071 if (s->s.h.keyframe || s->s.h.intraonly) {
4072 memset(s->left_mode_ctx, DC_PRED, 16);
4074 memset(s->left_mode_ctx, NEARESTMV, 8);
4076 memset(s->left_y_nnz_ctx, 0, 16);
4077 memset(s->left_uv_nnz_ctx, 0, 32);
4078 memset(s->left_segpred_ctx, 0, 8);
4080 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4083 for (col = s->tile_col_start;
4084 col < s->tile_col_end;
4085 col += 8, yoff2 += 64 * bytesperpixel,
4086 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4087 // FIXME integrate with lf code (i.e. zero after each
4088 // use, similar to invtxfm coefficients, or similar)
4090 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4094 decode_sb_mem(ctx, row, col, lflvl_ptr,
4095 yoff2, uvoff2, BL_64X64);
4097 decode_sb(ctx, row, col, lflvl_ptr,
4098 yoff2, uvoff2, BL_64X64);
4102 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4110 // backup pre-loopfilter reconstruction data for intra
4111 // prediction of next row of sb64s
4112 if (row + 8 < s->rows) {
4113 memcpy(s->intra_pred_data[0],
4114 f->data[0] + yoff + 63 * ls_y,
4115 8 * s->cols * bytesperpixel);
4116 memcpy(s->intra_pred_data[1],
4117 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4118 8 * s->cols * bytesperpixel >> s->ss_h);
4119 memcpy(s->intra_pred_data[2],
4120 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4121 8 * s->cols * bytesperpixel >> s->ss_h);
4124 // loopfilter one row
4125 if (s->s.h.filter.level) {
4128 lflvl_ptr = s->lflvl;
4129 for (col = 0; col < s->cols;
4130 col += 8, yoff2 += 64 * bytesperpixel,
4131 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4132 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4136 // FIXME maybe we can make this more finegrained by running the
4137 // loopfilter per-block instead of after each sbrow
4138 // In fact that would also make intra pred left preparation easier?
4139 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
4143 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
4145 ff_thread_finish_setup(ctx);
4147 } while (s->pass++ == 1);
4148 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4151 for (i = 0; i < 8; i++) {
4152 if (s->s.refs[i].f->buf[0])
4153 ff_thread_release_buffer(ctx, &s->s.refs[i]);
4154 if (s->next_refs[i].f->buf[0] &&
4155 (res = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
4159 if (!s->s.h.invisible) {
4160 if ((res = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
4168 static void vp9_decode_flush(AVCodecContext *ctx)
4170 VP9Context *s = ctx->priv_data;
4173 for (i = 0; i < 3; i++)
4174 vp9_unref_frame(ctx, &s->s.frames[i]);
4175 for (i = 0; i < 8; i++)
4176 ff_thread_release_buffer(ctx, &s->s.refs[i]);
4179 static int init_frames(AVCodecContext *ctx)
4181 VP9Context *s = ctx->priv_data;
4184 for (i = 0; i < 3; i++) {
4185 s->s.frames[i].tf.f = av_frame_alloc();
4186 if (!s->s.frames[i].tf.f) {
4187 vp9_decode_free(ctx);
4188 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4189 return AVERROR(ENOMEM);
4192 for (i = 0; i < 8; i++) {
4193 s->s.refs[i].f = av_frame_alloc();
4194 s->next_refs[i].f = av_frame_alloc();
4195 if (!s->s.refs[i].f || !s->next_refs[i].f) {
4196 vp9_decode_free(ctx);
4197 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4198 return AVERROR(ENOMEM);
4205 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4207 VP9Context *s = ctx->priv_data;
4209 ctx->internal->allocate_progress = 1;
4211 s->s.h.filter.sharpness = -1;
4213 return init_frames(ctx);
4217 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4219 return init_frames(avctx);
4222 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4225 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4227 // detect size changes in other threads
4228 if (s->intra_pred_data[0] &&
4229 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols ||
4230 s->rows != ssrc->rows || s->bpp != ssrc->bpp)) {
4234 for (i = 0; i < 3; i++) {
4235 if (s->s.frames[i].tf.f->buf[0])
4236 vp9_unref_frame(dst, &s->s.frames[i]);
4237 if (ssrc->s.frames[i].tf.f->buf[0]) {
4238 if ((res = vp9_ref_frame(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
4242 for (i = 0; i < 8; i++) {
4243 if (s->s.refs[i].f->buf[0])
4244 ff_thread_release_buffer(dst, &s->s.refs[i]);
4245 if (ssrc->next_refs[i].f->buf[0]) {
4246 if ((res = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
4251 s->s.h.invisible = ssrc->s.h.invisible;
4252 s->s.h.keyframe = ssrc->s.h.keyframe;
4253 s->s.h.intraonly = ssrc->s.h.intraonly;
4254 s->ss_v = ssrc->ss_v;
4255 s->ss_h = ssrc->ss_h;
4256 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
4257 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
4258 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
4259 s->bytesperpixel = ssrc->bytesperpixel;
4261 s->bpp_index = ssrc->bpp_index;
4262 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4263 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
4264 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
4265 sizeof(s->s.h.segmentation.feat));
4271 static const AVProfile profiles[] = {
4272 { FF_PROFILE_VP9_0, "Profile 0" },
4273 { FF_PROFILE_VP9_1, "Profile 1" },
4274 { FF_PROFILE_VP9_2, "Profile 2" },
4275 { FF_PROFILE_VP9_3, "Profile 3" },
4276 { FF_PROFILE_UNKNOWN },
4279 AVCodec ff_vp9_decoder = {
4281 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4282 .type = AVMEDIA_TYPE_VIDEO,
4283 .id = AV_CODEC_ID_VP9,
4284 .priv_data_size = sizeof(VP9Context),
4285 .init = vp9_decode_init,
4286 .close = vp9_decode_free,
4287 .decode = vp9_decode_frame,
4288 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4289 .flush = vp9_decode_flush,
4290 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4291 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4292 .profiles = NULL_IF_CONFIG_SMALL(profiles),