2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
156 uint8_t ignore_refmap;
161 uint8_t skip_enabled;
170 unsigned log2_tile_cols, log2_tile_rows;
171 unsigned tile_cols, tile_rows;
172 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174 unsigned sb_cols, sb_rows, rows, cols;
177 uint8_t coef[4][2][2][6][6][3];
181 uint8_t coef[4][2][2][6][6][11];
186 unsigned y_mode[4][10];
187 unsigned uv_mode[10][10];
188 unsigned filter[4][3];
189 unsigned mv_mode[7][4];
190 unsigned intra[4][2];
192 unsigned single_ref[5][2][2];
193 unsigned comp_ref[5][2];
194 unsigned tx32p[2][4];
195 unsigned tx16p[2][3];
198 unsigned mv_joint[4];
201 unsigned classes[11];
203 unsigned bits[10][2];
204 unsigned class0_fp[2][4];
206 unsigned class0_hp[2];
209 unsigned partition[4][4][4];
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
213 enum TxfmMode txfmmode;
214 enum CompPredMode comppredmode;
216 // contextual (left/above) cache
217 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229 uint8_t *above_partition_ctx;
230 uint8_t *above_mode_ctx;
231 // FIXME maybe merge some of the below in a flags field?
232 uint8_t *above_y_nnz_ctx;
233 uint8_t *above_uv_nnz_ctx[2];
234 uint8_t *above_skip_ctx; // 1bit
235 uint8_t *above_txfm_ctx; // 2bit
236 uint8_t *above_segpred_ctx; // 1bit
237 uint8_t *above_intra_ctx; // 1bit
238 uint8_t *above_comp_ctx; // 1bit
239 uint8_t *above_ref_ctx; // 2bit
240 uint8_t *above_filter_ctx;
241 VP56mv (*above_mv_ctx)[2];
244 uint8_t *intra_pred_data[3];
245 struct VP9Filter *lflvl;
246 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248 // block reconstruction intermediates
249 int block_alloc_using_2pass;
250 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252 struct { int x, y; } min_mv, max_mv;
253 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255 uint16_t mvscale[3][2];
256 uint8_t mvstep[3][2];
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271 VP9Context *s = ctx->priv_data;
274 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276 sz = 64 * s->sb_cols * s->sb_rows;
277 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278 ff_thread_release_buffer(ctx, &f->tf);
279 return AVERROR(ENOMEM);
282 f->segmentation_map = f->extradata->data;
283 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
292 f->segmentation_map = NULL;
295 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
299 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
301 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
302 vp9_unref_frame(ctx, dst);
303 return AVERROR(ENOMEM);
306 dst->segmentation_map = src->segmentation_map;
308 dst->uses_2pass = src->uses_2pass;
313 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
315 VP9Context *s = ctx->priv_data;
317 int bytesperpixel = s->bytesperpixel;
319 av_assert0(w > 0 && h > 0);
321 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
327 s->sb_cols = (w + 63) >> 6;
328 s->sb_rows = (h + 63) >> 6;
329 s->cols = (w + 7) >> 3;
330 s->rows = (h + 7) >> 3;
332 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
333 av_freep(&s->intra_pred_data[0]);
334 // FIXME we slightly over-allocate here for subsampled chroma, but a little
335 // bit of padding shouldn't affect performance...
336 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
337 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
339 return AVERROR(ENOMEM);
340 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
342 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
343 assign(s->above_y_nnz_ctx, uint8_t *, 16);
344 assign(s->above_mode_ctx, uint8_t *, 16);
345 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
346 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
347 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
348 assign(s->above_partition_ctx, uint8_t *, 8);
349 assign(s->above_skip_ctx, uint8_t *, 8);
350 assign(s->above_txfm_ctx, uint8_t *, 8);
351 assign(s->above_segpred_ctx, uint8_t *, 8);
352 assign(s->above_intra_ctx, uint8_t *, 8);
353 assign(s->above_comp_ctx, uint8_t *, 8);
354 assign(s->above_ref_ctx, uint8_t *, 8);
355 assign(s->above_filter_ctx, uint8_t *, 8);
356 assign(s->lflvl, struct VP9Filter *, 1);
359 // these will be re-allocated a little later
360 av_freep(&s->b_base);
361 av_freep(&s->block_base);
363 if (s->bpp != s->last_bpp) {
364 ff_vp9dsp_init(&s->dsp, s->bpp);
365 ff_videodsp_init(&s->vdsp, s->bpp);
366 s->last_bpp = s->bpp;
372 static int update_block_buffers(AVCodecContext *ctx)
374 VP9Context *s = ctx->priv_data;
375 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
377 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
381 av_free(s->block_base);
382 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
383 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
384 if (s->frames[CUR_FRAME].uses_2pass) {
385 int sbs = s->sb_cols * s->sb_rows;
387 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
388 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
389 16 * 16 + 2 * chroma_eobs) * sbs);
390 if (!s->b_base || !s->block_base)
391 return AVERROR(ENOMEM);
392 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
393 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
394 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
395 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
396 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
398 s->b_base = av_malloc(sizeof(VP9Block));
399 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
400 16 * 16 + 2 * chroma_eobs);
401 if (!s->b_base || !s->block_base)
402 return AVERROR(ENOMEM);
403 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
404 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
405 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
406 s->uveob_base[0] = s->eob_base + 16 * 16;
407 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
409 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
414 // for some reason the sign bit is at the end, not the start, of a bit sequence
415 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
417 int v = get_bits(gb, n);
418 return get_bits1(gb) ? -v : v;
421 static av_always_inline int inv_recenter_nonneg(int v, int m)
423 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
426 // differential forward probability updates
427 static int update_prob(VP56RangeCoder *c, int p)
429 static const int inv_map_table[255] = {
430 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
431 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
432 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
433 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
434 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
435 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
436 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
437 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
438 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
439 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
440 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
441 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
442 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
443 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
444 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
445 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
446 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
447 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
452 /* This code is trying to do a differential probability update. For a
453 * current probability A in the range [1, 255], the difference to a new
454 * probability of any value can be expressed differentially as 1-A,255-A
455 * where some part of this (absolute range) exists both in positive as
456 * well as the negative part, whereas another part only exists in one
457 * half. We're trying to code this shared part differentially, i.e.
458 * times two where the value of the lowest bit specifies the sign, and
459 * the single part is then coded on top of this. This absolute difference
460 * then again has a value of [0,254], but a bigger value in this range
461 * indicates that we're further away from the original value A, so we
462 * can code this as a VLC code, since higher values are increasingly
463 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
464 * updates vs. the 'fine, exact' updates further down the range, which
465 * adds one extra dimension to this differential update model. */
467 if (!vp8_rac_get(c)) {
468 d = vp8_rac_get_uint(c, 4) + 0;
469 } else if (!vp8_rac_get(c)) {
470 d = vp8_rac_get_uint(c, 4) + 16;
471 } else if (!vp8_rac_get(c)) {
472 d = vp8_rac_get_uint(c, 5) + 32;
474 d = vp8_rac_get_uint(c, 7);
476 d = (d << 1) - 65 + vp8_rac_get(c);
478 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
481 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
482 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
485 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
487 static const enum AVColorSpace colorspaces[8] = {
488 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
489 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
491 VP9Context *s = ctx->priv_data;
492 enum AVPixelFormat res;
493 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
496 s->bpp = 8 + bits * 2;
497 s->bytesperpixel = (7 + s->bpp) >> 3;
498 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
499 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
500 static const enum AVPixelFormat pix_fmt_rgb[3] = {
501 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
503 if (ctx->profile & 1) {
504 s->ss_h = s->ss_v = 0;
505 res = pix_fmt_rgb[bits];
506 ctx->color_range = AVCOL_RANGE_JPEG;
507 if (get_bits1(&s->gb)) {
508 av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
509 return AVERROR_INVALIDDATA;
512 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
514 return AVERROR_INVALIDDATA;
517 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
518 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
519 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
520 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
521 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
522 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
523 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
525 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
526 if (ctx->profile & 1) {
527 s->ss_h = get_bits1(&s->gb);
528 s->ss_v = get_bits1(&s->gb);
529 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
530 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
532 return AVERROR_INVALIDDATA;
533 } else if (get_bits1(&s->gb)) {
534 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
536 return AVERROR_INVALIDDATA;
539 s->ss_h = s->ss_v = 1;
540 res = pix_fmt_for_ss[bits][1][1];
547 static int decode_frame_header(AVCodecContext *ctx,
548 const uint8_t *data, int size, int *ref)
550 VP9Context *s = ctx->priv_data;
551 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
552 enum AVPixelFormat fmt = ctx->pix_fmt;
554 const uint8_t *data2;
557 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
558 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
561 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
562 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
563 return AVERROR_INVALIDDATA;
565 ctx->profile = get_bits1(&s->gb);
566 ctx->profile |= get_bits1(&s->gb) << 1;
567 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
568 if (ctx->profile > 3) {
569 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
570 return AVERROR_INVALIDDATA;
572 if (get_bits1(&s->gb)) {
573 *ref = get_bits(&s->gb, 3);
576 s->last_keyframe = s->keyframe;
577 s->keyframe = !get_bits1(&s->gb);
578 last_invisible = s->invisible;
579 s->invisible = !get_bits1(&s->gb);
580 s->errorres = get_bits1(&s->gb);
581 s->use_last_frame_mvs = !s->errorres && !last_invisible;
583 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
584 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
585 return AVERROR_INVALIDDATA;
587 if ((fmt = read_colorspace_details(ctx)) < 0)
589 // for profile 1, here follows the subsampling bits
590 s->refreshrefmask = 0xff;
591 w = get_bits(&s->gb, 16) + 1;
592 h = get_bits(&s->gb, 16) + 1;
593 if (get_bits1(&s->gb)) // display size
594 skip_bits(&s->gb, 32);
596 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
597 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
599 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
600 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
601 return AVERROR_INVALIDDATA;
603 if (ctx->profile >= 1) {
604 if ((fmt = read_colorspace_details(ctx)) < 0)
607 s->ss_h = s->ss_v = 1;
610 s->bytesperpixel = 1;
611 fmt = AV_PIX_FMT_YUV420P;
612 ctx->colorspace = AVCOL_SPC_BT470BG;
613 ctx->color_range = AVCOL_RANGE_JPEG;
615 s->refreshrefmask = get_bits(&s->gb, 8);
616 w = get_bits(&s->gb, 16) + 1;
617 h = get_bits(&s->gb, 16) + 1;
618 if (get_bits1(&s->gb)) // display size
619 skip_bits(&s->gb, 32);
621 s->refreshrefmask = get_bits(&s->gb, 8);
622 s->refidx[0] = get_bits(&s->gb, 3);
623 s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
624 s->refidx[1] = get_bits(&s->gb, 3);
625 s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
626 s->refidx[2] = get_bits(&s->gb, 3);
627 s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
628 if (!s->refs[s->refidx[0]].f->data[0] ||
629 !s->refs[s->refidx[1]].f->data[0] ||
630 !s->refs[s->refidx[2]].f->data[0]) {
631 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
632 return AVERROR_INVALIDDATA;
634 if (get_bits1(&s->gb)) {
635 w = s->refs[s->refidx[0]].f->width;
636 h = s->refs[s->refidx[0]].f->height;
637 } else if (get_bits1(&s->gb)) {
638 w = s->refs[s->refidx[1]].f->width;
639 h = s->refs[s->refidx[1]].f->height;
640 } else if (get_bits1(&s->gb)) {
641 w = s->refs[s->refidx[2]].f->width;
642 h = s->refs[s->refidx[2]].f->height;
644 w = get_bits(&s->gb, 16) + 1;
645 h = get_bits(&s->gb, 16) + 1;
647 // Note that in this code, "CUR_FRAME" is actually before we
648 // have formally allocated a frame, and thus actually represents
650 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
651 s->frames[CUR_FRAME].tf.f->height == h;
652 if (get_bits1(&s->gb)) // display size
653 skip_bits(&s->gb, 32);
654 s->highprecisionmvs = get_bits1(&s->gb);
655 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
657 s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
658 s->signbias[0] != s->signbias[2]);
659 if (s->allowcompinter) {
660 if (s->signbias[0] == s->signbias[1]) {
662 s->varcompref[0] = 0;
663 s->varcompref[1] = 1;
664 } else if (s->signbias[0] == s->signbias[2]) {
666 s->varcompref[0] = 0;
667 s->varcompref[1] = 2;
670 s->varcompref[0] = 1;
671 s->varcompref[1] = 2;
675 for (i = 0; i < 3; i++) {
676 AVFrame *ref = s->refs[s->refidx[i]].f;
677 int refw = ref->width, refh = ref->height;
679 if (ref->format != fmt) {
680 av_log(ctx, AV_LOG_ERROR,
681 "Ref pixfmt (%s) did not match current frame (%s)",
682 av_get_pix_fmt_name(ref->format),
683 av_get_pix_fmt_name(fmt));
684 return AVERROR_INVALIDDATA;
685 } else if (refw == w && refh == h) {
686 s->mvscale[i][0] = s->mvscale[i][1] = 0;
688 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
689 av_log(ctx, AV_LOG_ERROR,
690 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
692 return AVERROR_INVALIDDATA;
694 s->mvscale[i][0] = (refw << 14) / w;
695 s->mvscale[i][1] = (refh << 14) / h;
696 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
697 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
702 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
703 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
704 s->framectxid = c = get_bits(&s->gb, 2);
706 /* loopfilter header data */
707 if (s->keyframe || s->errorres || s->intraonly) {
708 // reset loopfilter defaults
709 s->lf_delta.ref[0] = 1;
710 s->lf_delta.ref[1] = 0;
711 s->lf_delta.ref[2] = -1;
712 s->lf_delta.ref[3] = -1;
713 s->lf_delta.mode[0] = 0;
714 s->lf_delta.mode[1] = 0;
715 memset(s->segmentation.feat, 0, sizeof(s->segmentation.feat));
717 s->filter.level = get_bits(&s->gb, 6);
718 sharp = get_bits(&s->gb, 3);
719 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
720 // the old cache values since they are still valid
721 if (s->filter.sharpness != sharp)
722 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
723 s->filter.sharpness = sharp;
724 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
725 if (get_bits1(&s->gb)) {
726 for (i = 0; i < 4; i++)
727 if (get_bits1(&s->gb))
728 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
729 for (i = 0; i < 2; i++)
730 if (get_bits1(&s->gb))
731 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
735 /* quantization header data */
736 s->yac_qi = get_bits(&s->gb, 8);
737 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
738 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
739 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
740 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
741 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
743 ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
745 /* segmentation header info */
746 s->segmentation.ignore_refmap = 0;
747 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
748 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
749 for (i = 0; i < 7; i++)
750 s->prob.seg[i] = get_bits1(&s->gb) ?
751 get_bits(&s->gb, 8) : 255;
752 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
753 for (i = 0; i < 3; i++)
754 s->prob.segpred[i] = get_bits1(&s->gb) ?
755 get_bits(&s->gb, 8) : 255;
758 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
759 (w != s->frames[CUR_FRAME].tf.f->width ||
760 h != s->frames[CUR_FRAME].tf.f->height)) {
761 av_log(ctx, AV_LOG_WARNING,
762 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
763 s->segmentation.temporal, s->segmentation.update_map);
764 s->segmentation.ignore_refmap = 1;
765 //return AVERROR_INVALIDDATA;
768 if (get_bits1(&s->gb)) {
769 s->segmentation.absolute_vals = get_bits1(&s->gb);
770 for (i = 0; i < 8; i++) {
771 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
772 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
773 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
774 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
775 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
776 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
777 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
782 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
783 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
784 int qyac, qydc, quvac, quvdc, lflvl, sh;
786 if (s->segmentation.enabled && s->segmentation.feat[i].q_enabled) {
787 if (s->segmentation.absolute_vals)
788 qyac = av_clip_uintp2(s->segmentation.feat[i].q_val, 8);
790 qyac = av_clip_uintp2(s->yac_qi + s->segmentation.feat[i].q_val, 8);
794 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
795 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
796 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
797 qyac = av_clip_uintp2(qyac, 8);
799 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
800 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
801 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
802 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
804 sh = s->filter.level >= 32;
805 if (s->segmentation.enabled && s->segmentation.feat[i].lf_enabled) {
806 if (s->segmentation.absolute_vals)
807 lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
809 lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
811 lflvl = s->filter.level;
813 if (s->lf_delta.enabled) {
814 s->segmentation.feat[i].lflvl[0][0] =
815 s->segmentation.feat[i].lflvl[0][1] =
816 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
817 for (j = 1; j < 4; j++) {
818 s->segmentation.feat[i].lflvl[j][0] =
819 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
820 s->lf_delta.mode[0]) * (1 << sh)), 6);
821 s->segmentation.feat[i].lflvl[j][1] =
822 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
823 s->lf_delta.mode[1]) * (1 << sh)), 6);
826 memset(s->segmentation.feat[i].lflvl, lflvl,
827 sizeof(s->segmentation.feat[i].lflvl));
832 if ((res = update_size(ctx, w, h, fmt)) < 0) {
833 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
836 for (s->tiling.log2_tile_cols = 0;
837 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
838 s->tiling.log2_tile_cols++) ;
839 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
840 max = FFMAX(0, max - 1);
841 while (max > s->tiling.log2_tile_cols) {
842 if (get_bits1(&s->gb))
843 s->tiling.log2_tile_cols++;
847 s->tiling.log2_tile_rows = decode012(&s->gb);
848 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
849 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
850 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
851 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
852 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
854 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
855 return AVERROR(ENOMEM);
859 if (s->keyframe || s->errorres || (s->intraonly && s->resetctx == 3)) {
860 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
861 s->prob_ctx[3].p = vp9_default_probs;
862 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
863 sizeof(vp9_default_coef_probs));
864 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
865 sizeof(vp9_default_coef_probs));
866 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
867 sizeof(vp9_default_coef_probs));
868 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
869 sizeof(vp9_default_coef_probs));
870 } else if (s->intraonly && s->resetctx == 2) {
871 s->prob_ctx[c].p = vp9_default_probs;
872 memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
873 sizeof(vp9_default_coef_probs));
876 // next 16 bits is size of the rest of the header (arith-coded)
877 size2 = get_bits(&s->gb, 16);
878 data2 = align_get_bits(&s->gb);
879 if (size2 > size - (data2 - data)) {
880 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
881 return AVERROR_INVALIDDATA;
883 ff_vp56_init_range_decoder(&s->c, data2, size2);
884 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
885 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
886 return AVERROR_INVALIDDATA;
889 if (s->keyframe || s->intraonly) {
890 memset(s->counts.coef, 0, sizeof(s->counts.coef));
891 memset(s->counts.eob, 0, sizeof(s->counts.eob));
893 memset(&s->counts, 0, sizeof(s->counts));
895 // FIXME is it faster to not copy here, but do it down in the fw updates
896 // as explicit copies if the fw update is missing (and skip the copy upon
898 s->prob.p = s->prob_ctx[c].p;
902 s->txfmmode = TX_4X4;
904 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
905 if (s->txfmmode == 3)
906 s->txfmmode += vp8_rac_get(&s->c);
908 if (s->txfmmode == TX_SWITCHABLE) {
909 for (i = 0; i < 2; i++)
910 if (vp56_rac_get_prob_branchy(&s->c, 252))
911 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
912 for (i = 0; i < 2; i++)
913 for (j = 0; j < 2; j++)
914 if (vp56_rac_get_prob_branchy(&s->c, 252))
915 s->prob.p.tx16p[i][j] =
916 update_prob(&s->c, s->prob.p.tx16p[i][j]);
917 for (i = 0; i < 2; i++)
918 for (j = 0; j < 3; j++)
919 if (vp56_rac_get_prob_branchy(&s->c, 252))
920 s->prob.p.tx32p[i][j] =
921 update_prob(&s->c, s->prob.p.tx32p[i][j]);
926 for (i = 0; i < 4; i++) {
927 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
928 if (vp8_rac_get(&s->c)) {
929 for (j = 0; j < 2; j++)
930 for (k = 0; k < 2; k++)
931 for (l = 0; l < 6; l++)
932 for (m = 0; m < 6; m++) {
933 uint8_t *p = s->prob.coef[i][j][k][l][m];
934 uint8_t *r = ref[j][k][l][m];
935 if (m >= 3 && l == 0) // dc only has 3 pt
937 for (n = 0; n < 3; n++) {
938 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
939 p[n] = update_prob(&s->c, r[n]);
947 for (j = 0; j < 2; j++)
948 for (k = 0; k < 2; k++)
949 for (l = 0; l < 6; l++)
950 for (m = 0; m < 6; m++) {
951 uint8_t *p = s->prob.coef[i][j][k][l][m];
952 uint8_t *r = ref[j][k][l][m];
953 if (m > 3 && l == 0) // dc only has 3 pt
959 if (s->txfmmode == i)
964 for (i = 0; i < 3; i++)
965 if (vp56_rac_get_prob_branchy(&s->c, 252))
966 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
967 if (!s->keyframe && !s->intraonly) {
968 for (i = 0; i < 7; i++)
969 for (j = 0; j < 3; j++)
970 if (vp56_rac_get_prob_branchy(&s->c, 252))
971 s->prob.p.mv_mode[i][j] =
972 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
974 if (s->filtermode == FILTER_SWITCHABLE)
975 for (i = 0; i < 4; i++)
976 for (j = 0; j < 2; j++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.filter[i][j] =
979 update_prob(&s->c, s->prob.p.filter[i][j]);
981 for (i = 0; i < 4; i++)
982 if (vp56_rac_get_prob_branchy(&s->c, 252))
983 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
985 if (s->allowcompinter) {
986 s->comppredmode = vp8_rac_get(&s->c);
988 s->comppredmode += vp8_rac_get(&s->c);
989 if (s->comppredmode == PRED_SWITCHABLE)
990 for (i = 0; i < 5; i++)
991 if (vp56_rac_get_prob_branchy(&s->c, 252))
993 update_prob(&s->c, s->prob.p.comp[i]);
995 s->comppredmode = PRED_SINGLEREF;
998 if (s->comppredmode != PRED_COMPREF) {
999 for (i = 0; i < 5; i++) {
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.single_ref[i][0] =
1002 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1003 if (vp56_rac_get_prob_branchy(&s->c, 252))
1004 s->prob.p.single_ref[i][1] =
1005 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1009 if (s->comppredmode != PRED_SINGLEREF) {
1010 for (i = 0; i < 5; i++)
1011 if (vp56_rac_get_prob_branchy(&s->c, 252))
1012 s->prob.p.comp_ref[i] =
1013 update_prob(&s->c, s->prob.p.comp_ref[i]);
1016 for (i = 0; i < 4; i++)
1017 for (j = 0; j < 9; j++)
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.y_mode[i][j] =
1020 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1022 for (i = 0; i < 4; i++)
1023 for (j = 0; j < 4; j++)
1024 for (k = 0; k < 3; k++)
1025 if (vp56_rac_get_prob_branchy(&s->c, 252))
1026 s->prob.p.partition[3 - i][j][k] =
1027 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1029 // mv fields don't use the update_prob subexp model for some reason
1030 for (i = 0; i < 3; i++)
1031 if (vp56_rac_get_prob_branchy(&s->c, 252))
1032 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1034 for (i = 0; i < 2; i++) {
1035 if (vp56_rac_get_prob_branchy(&s->c, 252))
1036 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1038 for (j = 0; j < 10; j++)
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_comp[i].classes[j] =
1041 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1043 if (vp56_rac_get_prob_branchy(&s->c, 252))
1044 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1046 for (j = 0; j < 10; j++)
1047 if (vp56_rac_get_prob_branchy(&s->c, 252))
1048 s->prob.p.mv_comp[i].bits[j] =
1049 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1052 for (i = 0; i < 2; i++) {
1053 for (j = 0; j < 2; j++)
1054 for (k = 0; k < 3; k++)
1055 if (vp56_rac_get_prob_branchy(&s->c, 252))
1056 s->prob.p.mv_comp[i].class0_fp[j][k] =
1057 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1059 for (j = 0; j < 3; j++)
1060 if (vp56_rac_get_prob_branchy(&s->c, 252))
1061 s->prob.p.mv_comp[i].fp[j] =
1062 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1065 if (s->highprecisionmvs) {
1066 for (i = 0; i < 2; i++) {
1067 if (vp56_rac_get_prob_branchy(&s->c, 252))
1068 s->prob.p.mv_comp[i].class0_hp =
1069 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1071 if (vp56_rac_get_prob_branchy(&s->c, 252))
1072 s->prob.p.mv_comp[i].hp =
1073 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1078 return (data2 - data) + size2;
1081 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1084 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1085 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1088 static void find_ref_mvs(VP9Context *s,
1089 VP56mv *pmv, int ref, int z, int idx, int sb)
1091 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1092 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1093 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1094 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1095 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1096 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1097 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1098 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1099 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1100 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1101 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1102 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1103 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1104 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1105 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1106 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1107 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1108 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1109 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1110 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1111 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1112 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1113 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1114 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1115 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1116 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1117 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1120 int row = s->row, col = s->col, row7 = s->row7;
1121 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1122 #define INVALID_MV 0x80008000U
1123 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1126 #define RETURN_DIRECT_MV(mv) \
1128 uint32_t m = AV_RN32A(&mv); \
1132 } else if (mem == INVALID_MV) { \
1134 } else if (m != mem) { \
1141 if (sb == 2 || sb == 1) {
1142 RETURN_DIRECT_MV(b->mv[0][z]);
1143 } else if (sb == 3) {
1144 RETURN_DIRECT_MV(b->mv[2][z]);
1145 RETURN_DIRECT_MV(b->mv[1][z]);
1146 RETURN_DIRECT_MV(b->mv[0][z]);
1149 #define RETURN_MV(mv) \
1154 av_assert2(idx == 1); \
1155 av_assert2(mem != INVALID_MV); \
1156 if (mem_sub8x8 == INVALID_MV) { \
1157 clamp_mv(&tmp, &mv, s); \
1158 m = AV_RN32A(&tmp); \
1163 mem_sub8x8 = AV_RN32A(&mv); \
1164 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1165 clamp_mv(&tmp, &mv, s); \
1166 m = AV_RN32A(&tmp); \
1170 /* BUG I'm pretty sure this isn't the intention */ \
1176 uint32_t m = AV_RN32A(&mv); \
1178 clamp_mv(pmv, &mv, s); \
1180 } else if (mem == INVALID_MV) { \
1182 } else if (m != mem) { \
1183 clamp_mv(pmv, &mv, s); \
1190 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1191 if (mv->ref[0] == ref) {
1192 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1193 } else if (mv->ref[1] == ref) {
1194 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1197 if (col > s->tiling.tile_col_start) {
1198 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1199 if (mv->ref[0] == ref) {
1200 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1201 } else if (mv->ref[1] == ref) {
1202 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1210 // previously coded MVs in this neighbourhood, using same reference frame
1211 for (; i < 8; i++) {
1212 int c = p[i][0] + col, r = p[i][1] + row;
1214 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1215 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1217 if (mv->ref[0] == ref) {
1218 RETURN_MV(mv->mv[0]);
1219 } else if (mv->ref[1] == ref) {
1220 RETURN_MV(mv->mv[1]);
1225 // MV at this position in previous frame, using same reference frame
1226 if (s->use_last_frame_mvs) {
1227 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1229 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1230 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1231 if (mv->ref[0] == ref) {
1232 RETURN_MV(mv->mv[0]);
1233 } else if (mv->ref[1] == ref) {
1234 RETURN_MV(mv->mv[1]);
1238 #define RETURN_SCALE_MV(mv, scale) \
1241 VP56mv mv_temp = { -mv.x, -mv.y }; \
1242 RETURN_MV(mv_temp); \
1248 // previously coded MVs in this neighbourhood, using different reference frame
1249 for (i = 0; i < 8; i++) {
1250 int c = p[i][0] + col, r = p[i][1] + row;
1252 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1253 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1255 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1256 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1258 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1259 // BUG - libvpx has this condition regardless of whether
1260 // we used the first ref MV and pre-scaling
1261 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1262 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1267 // MV at this position in previous frame, using different reference frame
1268 if (s->use_last_frame_mvs) {
1269 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1271 // no need to await_progress, because we already did that above
1272 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1273 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1275 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1276 // BUG - libvpx has this condition regardless of whether
1277 // we used the first ref MV and pre-scaling
1278 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1279 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1284 clamp_mv(pmv, pmv, s);
1287 #undef RETURN_SCALE_MV
1290 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1292 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1293 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1294 s->prob.p.mv_comp[idx].classes);
1296 s->counts.mv_comp[idx].sign[sign]++;
1297 s->counts.mv_comp[idx].classes[c]++;
1301 for (n = 0, m = 0; m < c; m++) {
1302 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1304 s->counts.mv_comp[idx].bits[m][bit]++;
1307 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1309 s->counts.mv_comp[idx].fp[bit]++;
1311 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1312 s->counts.mv_comp[idx].hp[bit]++;
1316 // bug in libvpx - we count for bw entropy purposes even if the
1318 s->counts.mv_comp[idx].hp[1]++;
1322 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1323 s->counts.mv_comp[idx].class0[n]++;
1324 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1325 s->prob.p.mv_comp[idx].class0_fp[n]);
1326 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1327 n = (n << 3) | (bit << 1);
1329 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1330 s->counts.mv_comp[idx].class0_hp[bit]++;
1334 // bug in libvpx - we count for bw entropy purposes even if the
1336 s->counts.mv_comp[idx].class0_hp[1]++;
1340 return sign ? -(n + 1) : (n + 1);
1343 static void fill_mv(VP9Context *s,
1344 VP56mv *mv, int mode, int sb)
1348 if (mode == ZEROMV) {
1353 // FIXME cache this value and reuse for other subblocks
1354 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1355 mode == NEWMV ? -1 : sb);
1356 // FIXME maybe move this code into find_ref_mvs()
1357 if ((mode == NEWMV || sb == -1) &&
1358 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1372 if (mode == NEWMV) {
1373 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1374 s->prob.p.mv_joint);
1376 s->counts.mv_joint[j]++;
1377 if (j >= MV_JOINT_V)
1378 mv[0].y += read_mv_component(s, 0, hp);
1380 mv[0].x += read_mv_component(s, 1, hp);
1384 // FIXME cache this value and reuse for other subblocks
1385 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1386 mode == NEWMV ? -1 : sb);
1387 if ((mode == NEWMV || sb == -1) &&
1388 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1402 if (mode == NEWMV) {
1403 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1404 s->prob.p.mv_joint);
1406 s->counts.mv_joint[j]++;
1407 if (j >= MV_JOINT_V)
1408 mv[1].y += read_mv_component(s, 0, hp);
1410 mv[1].x += read_mv_component(s, 1, hp);
1416 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1417 ptrdiff_t stride, int v)
1427 int v16 = v * 0x0101;
1435 uint32_t v32 = v * 0x01010101;
1444 uint64_t v64 = v * 0x0101010101010101ULL;
1450 uint32_t v32 = v * 0x01010101;
1453 AV_WN32A(ptr + 4, v32);
1462 static void decode_mode(AVCodecContext *ctx)
1464 static const uint8_t left_ctx[N_BS_SIZES] = {
1465 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1467 static const uint8_t above_ctx[N_BS_SIZES] = {
1468 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1470 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1471 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1472 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1474 VP9Context *s = ctx->priv_data;
1476 int row = s->row, col = s->col, row7 = s->row7;
1477 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1478 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1479 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1480 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1481 int vref, filter_id;
1483 if (!s->segmentation.enabled) {
1485 } else if (s->keyframe || s->intraonly) {
1486 b->seg_id = !s->segmentation.update_map ? 0 :
1487 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1488 } else if (!s->segmentation.update_map ||
1489 (s->segmentation.temporal &&
1490 vp56_rac_get_prob_branchy(&s->c,
1491 s->prob.segpred[s->above_segpred_ctx[col] +
1492 s->left_segpred_ctx[row7]]))) {
1493 if (!s->errorres && !s->segmentation.ignore_refmap) {
1495 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1497 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1498 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1499 for (y = 0; y < h4; y++) {
1500 int idx_base = (y + row) * 8 * s->sb_cols + col;
1501 for (x = 0; x < w4; x++)
1502 pred = FFMIN(pred, refsegmap[idx_base + x]);
1504 av_assert1(pred < 8);
1510 memset(&s->above_segpred_ctx[col], 1, w4);
1511 memset(&s->left_segpred_ctx[row7], 1, h4);
1513 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1516 memset(&s->above_segpred_ctx[col], 0, w4);
1517 memset(&s->left_segpred_ctx[row7], 0, h4);
1519 if (s->segmentation.enabled &&
1520 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1521 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1522 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1525 b->skip = s->segmentation.enabled &&
1526 s->segmentation.feat[b->seg_id].skip_enabled;
1528 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1529 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1530 s->counts.skip[c][b->skip]++;
1533 if (s->keyframe || s->intraonly) {
1535 } else if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1536 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1540 if (have_a && have_l) {
1541 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1544 c = have_a ? 2 * s->above_intra_ctx[col] :
1545 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1547 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1548 s->counts.intra[c][bit]++;
1552 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1556 c = (s->above_skip_ctx[col] ? max_tx :
1557 s->above_txfm_ctx[col]) +
1558 (s->left_skip_ctx[row7] ? max_tx :
1559 s->left_txfm_ctx[row7]) > max_tx;
1561 c = s->above_skip_ctx[col] ? 1 :
1562 (s->above_txfm_ctx[col] * 2 > max_tx);
1564 } else if (have_l) {
1565 c = s->left_skip_ctx[row7] ? 1 :
1566 (s->left_txfm_ctx[row7] * 2 > max_tx);
1572 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1574 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1576 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1578 s->counts.tx32p[c][b->tx]++;
1581 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1583 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1584 s->counts.tx16p[c][b->tx]++;
1587 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1588 s->counts.tx8p[c][b->tx]++;
1595 b->tx = FFMIN(max_tx, s->txfmmode);
1598 if (s->keyframe || s->intraonly) {
1599 uint8_t *a = &s->above_mode_ctx[col * 2];
1600 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1603 if (b->bs > BS_8x8) {
1604 // FIXME the memory storage intermediates here aren't really
1605 // necessary, they're just there to make the code slightly
1607 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1608 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1609 if (b->bs != BS_8x4) {
1610 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1611 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1612 l[0] = a[1] = b->mode[1];
1614 l[0] = a[1] = b->mode[1] = b->mode[0];
1616 if (b->bs != BS_4x8) {
1617 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1618 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1619 if (b->bs != BS_8x4) {
1620 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1621 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1622 l[1] = a[1] = b->mode[3];
1624 l[1] = a[1] = b->mode[3] = b->mode[2];
1627 b->mode[2] = b->mode[0];
1628 l[1] = a[1] = b->mode[3] = b->mode[1];
1631 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1632 vp9_default_kf_ymode_probs[*a][*l]);
1633 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1634 // FIXME this can probably be optimized
1635 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1636 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1638 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1639 vp9_default_kf_uvmode_probs[b->mode[3]]);
1640 } else if (b->intra) {
1642 if (b->bs > BS_8x8) {
1643 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1644 s->prob.p.y_mode[0]);
1645 s->counts.y_mode[0][b->mode[0]]++;
1646 if (b->bs != BS_8x4) {
1647 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1648 s->prob.p.y_mode[0]);
1649 s->counts.y_mode[0][b->mode[1]]++;
1651 b->mode[1] = b->mode[0];
1653 if (b->bs != BS_4x8) {
1654 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1655 s->prob.p.y_mode[0]);
1656 s->counts.y_mode[0][b->mode[2]]++;
1657 if (b->bs != BS_8x4) {
1658 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1659 s->prob.p.y_mode[0]);
1660 s->counts.y_mode[0][b->mode[3]]++;
1662 b->mode[3] = b->mode[2];
1665 b->mode[2] = b->mode[0];
1666 b->mode[3] = b->mode[1];
1669 static const uint8_t size_group[10] = {
1670 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1672 int sz = size_group[b->bs];
1674 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1675 s->prob.p.y_mode[sz]);
1676 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1677 s->counts.y_mode[sz][b->mode[3]]++;
1679 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1680 s->prob.p.uv_mode[b->mode[3]]);
1681 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1683 static const uint8_t inter_mode_ctx_lut[14][14] = {
1684 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1686 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1687 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1688 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1689 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1690 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1691 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1692 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1693 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1694 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1695 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1696 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1697 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1700 if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1701 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1703 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1705 // read comp_pred flag
1706 if (s->comppredmode != PRED_SWITCHABLE) {
1707 b->comp = s->comppredmode == PRED_COMPREF;
1711 // FIXME add intra as ref=0xff (or -1) to make these easier?
1714 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1716 } else if (s->above_comp_ctx[col]) {
1717 c = 2 + (s->left_intra_ctx[row7] ||
1718 s->left_ref_ctx[row7] == s->fixcompref);
1719 } else if (s->left_comp_ctx[row7]) {
1720 c = 2 + (s->above_intra_ctx[col] ||
1721 s->above_ref_ctx[col] == s->fixcompref);
1723 c = (!s->above_intra_ctx[col] &&
1724 s->above_ref_ctx[col] == s->fixcompref) ^
1725 (!s->left_intra_ctx[row7] &&
1726 s->left_ref_ctx[row & 7] == s->fixcompref);
1729 c = s->above_comp_ctx[col] ? 3 :
1730 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1732 } else if (have_l) {
1733 c = s->left_comp_ctx[row7] ? 3 :
1734 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1738 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1739 s->counts.comp[c][b->comp]++;
1742 // read actual references
1743 // FIXME probably cache a few variables here to prevent repetitive
1744 // memory accesses below
1745 if (b->comp) /* two references */ {
1746 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1748 b->ref[fix_idx] = s->fixcompref;
1749 // FIXME can this codeblob be replaced by some sort of LUT?
1752 if (s->above_intra_ctx[col]) {
1753 if (s->left_intra_ctx[row7]) {
1756 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1758 } else if (s->left_intra_ctx[row7]) {
1759 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1761 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1763 if (refl == refa && refa == s->varcompref[1]) {
1765 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1766 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1767 (refl == s->fixcompref && refa == s->varcompref[0])) {
1770 c = (refa == refl) ? 3 : 1;
1772 } else if (!s->left_comp_ctx[row7]) {
1773 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1776 c = (refl == s->varcompref[1] &&
1777 refa != s->varcompref[1]) ? 2 : 4;
1779 } else if (!s->above_comp_ctx[col]) {
1780 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1783 c = (refa == s->varcompref[1] &&
1784 refl != s->varcompref[1]) ? 2 : 4;
1787 c = (refl == refa) ? 4 : 2;
1791 if (s->above_intra_ctx[col]) {
1793 } else if (s->above_comp_ctx[col]) {
1794 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1796 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1799 } else if (have_l) {
1800 if (s->left_intra_ctx[row7]) {
1802 } else if (s->left_comp_ctx[row7]) {
1803 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1805 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1810 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1811 b->ref[var_idx] = s->varcompref[bit];
1812 s->counts.comp_ref[c][bit]++;
1813 } else /* single reference */ {
1816 if (have_a && !s->above_intra_ctx[col]) {
1817 if (have_l && !s->left_intra_ctx[row7]) {
1818 if (s->left_comp_ctx[row7]) {
1819 if (s->above_comp_ctx[col]) {
1820 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1821 !s->above_ref_ctx[col]);
1823 c = (3 * !s->above_ref_ctx[col]) +
1824 (!s->fixcompref || !s->left_ref_ctx[row7]);
1826 } else if (s->above_comp_ctx[col]) {
1827 c = (3 * !s->left_ref_ctx[row7]) +
1828 (!s->fixcompref || !s->above_ref_ctx[col]);
1830 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1832 } else if (s->above_intra_ctx[col]) {
1834 } else if (s->above_comp_ctx[col]) {
1835 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1837 c = 4 * (!s->above_ref_ctx[col]);
1839 } else if (have_l && !s->left_intra_ctx[row7]) {
1840 if (s->left_intra_ctx[row7]) {
1842 } else if (s->left_comp_ctx[row7]) {
1843 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1845 c = 4 * (!s->left_ref_ctx[row7]);
1850 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1851 s->counts.single_ref[c][0][bit]++;
1855 // FIXME can this codeblob be replaced by some sort of LUT?
1858 if (s->left_intra_ctx[row7]) {
1859 if (s->above_intra_ctx[col]) {
1861 } else if (s->above_comp_ctx[col]) {
1862 c = 1 + 2 * (s->fixcompref == 1 ||
1863 s->above_ref_ctx[col] == 1);
1864 } else if (!s->above_ref_ctx[col]) {
1867 c = 4 * (s->above_ref_ctx[col] == 1);
1869 } else if (s->above_intra_ctx[col]) {
1870 if (s->left_intra_ctx[row7]) {
1872 } else if (s->left_comp_ctx[row7]) {
1873 c = 1 + 2 * (s->fixcompref == 1 ||
1874 s->left_ref_ctx[row7] == 1);
1875 } else if (!s->left_ref_ctx[row7]) {
1878 c = 4 * (s->left_ref_ctx[row7] == 1);
1880 } else if (s->above_comp_ctx[col]) {
1881 if (s->left_comp_ctx[row7]) {
1882 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1883 c = 3 * (s->fixcompref == 1 ||
1884 s->left_ref_ctx[row7] == 1);
1888 } else if (!s->left_ref_ctx[row7]) {
1889 c = 1 + 2 * (s->fixcompref == 1 ||
1890 s->above_ref_ctx[col] == 1);
1892 c = 3 * (s->left_ref_ctx[row7] == 1) +
1893 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1895 } else if (s->left_comp_ctx[row7]) {
1896 if (!s->above_ref_ctx[col]) {
1897 c = 1 + 2 * (s->fixcompref == 1 ||
1898 s->left_ref_ctx[row7] == 1);
1900 c = 3 * (s->above_ref_ctx[col] == 1) +
1901 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1903 } else if (!s->above_ref_ctx[col]) {
1904 if (!s->left_ref_ctx[row7]) {
1907 c = 4 * (s->left_ref_ctx[row7] == 1);
1909 } else if (!s->left_ref_ctx[row7]) {
1910 c = 4 * (s->above_ref_ctx[col] == 1);
1912 c = 2 * (s->left_ref_ctx[row7] == 1) +
1913 2 * (s->above_ref_ctx[col] == 1);
1916 if (s->above_intra_ctx[col] ||
1917 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1919 } else if (s->above_comp_ctx[col]) {
1920 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1922 c = 4 * (s->above_ref_ctx[col] == 1);
1925 } else if (have_l) {
1926 if (s->left_intra_ctx[row7] ||
1927 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1929 } else if (s->left_comp_ctx[row7]) {
1930 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1932 c = 4 * (s->left_ref_ctx[row7] == 1);
1937 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1938 s->counts.single_ref[c][1][bit]++;
1939 b->ref[0] = 1 + bit;
1944 if (b->bs <= BS_8x8) {
1945 if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].skip_enabled) {
1946 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1948 static const uint8_t off[10] = {
1949 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1952 // FIXME this needs to use the LUT tables from find_ref_mvs
1953 // because not all are -1,0/0,-1
1954 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1955 [s->left_mode_ctx[row7 + off[b->bs]]];
1957 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1958 s->prob.p.mv_mode[c]);
1959 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1960 s->counts.mv_mode[c][b->mode[0] - 10]++;
1964 if (s->filtermode == FILTER_SWITCHABLE) {
1967 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1968 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1969 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1970 s->left_filter_ctx[row7] : 3;
1972 c = s->above_filter_ctx[col];
1974 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1975 c = s->left_filter_ctx[row7];
1980 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1981 s->prob.p.filter[c]);
1982 s->counts.filter[c][filter_id]++;
1983 b->filter = vp9_filter_lut[filter_id];
1985 b->filter = s->filtermode;
1988 if (b->bs > BS_8x8) {
1989 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1991 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1992 s->prob.p.mv_mode[c]);
1993 s->counts.mv_mode[c][b->mode[0] - 10]++;
1994 fill_mv(s, b->mv[0], b->mode[0], 0);
1996 if (b->bs != BS_8x4) {
1997 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1998 s->prob.p.mv_mode[c]);
1999 s->counts.mv_mode[c][b->mode[1] - 10]++;
2000 fill_mv(s, b->mv[1], b->mode[1], 1);
2002 b->mode[1] = b->mode[0];
2003 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2004 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2007 if (b->bs != BS_4x8) {
2008 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2009 s->prob.p.mv_mode[c]);
2010 s->counts.mv_mode[c][b->mode[2] - 10]++;
2011 fill_mv(s, b->mv[2], b->mode[2], 2);
2013 if (b->bs != BS_8x4) {
2014 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2015 s->prob.p.mv_mode[c]);
2016 s->counts.mv_mode[c][b->mode[3] - 10]++;
2017 fill_mv(s, b->mv[3], b->mode[3], 3);
2019 b->mode[3] = b->mode[2];
2020 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2021 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2024 b->mode[2] = b->mode[0];
2025 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2026 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2027 b->mode[3] = b->mode[1];
2028 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2029 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2032 fill_mv(s, b->mv[0], b->mode[0], -1);
2033 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2034 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2035 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2036 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2037 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2038 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2041 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2045 #define SPLAT_CTX(var, val, n) \
2047 case 1: var = val; break; \
2048 case 2: AV_WN16A(&var, val * 0x0101); break; \
2049 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2050 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2052 uint64_t v64 = val * 0x0101010101010101ULL; \
2053 AV_WN64A( &var, v64); \
2054 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2059 #define SPLAT_CTX(var, val, n) \
2061 case 1: var = val; break; \
2062 case 2: AV_WN16A(&var, val * 0x0101); break; \
2063 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2065 uint32_t v32 = val * 0x01010101; \
2066 AV_WN32A( &var, v32); \
2067 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2071 uint32_t v32 = val * 0x01010101; \
2072 AV_WN32A( &var, v32); \
2073 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2074 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2075 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2081 switch (bwh_tab[1][b->bs][0]) {
2082 #define SET_CTXS(dir, off, n) \
2084 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2085 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2086 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2087 if (!s->keyframe && !s->intraonly) { \
2088 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2089 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2090 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2092 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2093 if (s->filtermode == FILTER_SWITCHABLE) { \
2094 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2099 case 1: SET_CTXS(above, col, 1); break;
2100 case 2: SET_CTXS(above, col, 2); break;
2101 case 4: SET_CTXS(above, col, 4); break;
2102 case 8: SET_CTXS(above, col, 8); break;
2104 switch (bwh_tab[1][b->bs][1]) {
2105 case 1: SET_CTXS(left, row7, 1); break;
2106 case 2: SET_CTXS(left, row7, 2); break;
2107 case 4: SET_CTXS(left, row7, 4); break;
2108 case 8: SET_CTXS(left, row7, 8); break;
2113 if (!s->keyframe && !s->intraonly) {
2114 if (b->bs > BS_8x8) {
2115 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2117 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2118 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2119 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2120 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2121 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2122 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2123 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2124 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2126 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2128 for (n = 0; n < w4 * 2; n++) {
2129 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2130 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2132 for (n = 0; n < h4 * 2; n++) {
2133 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2134 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2140 for (y = 0; y < h4; y++) {
2141 int x, o = (row + y) * s->sb_cols * 8 + col;
2142 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2145 for (x = 0; x < w4; x++) {
2149 } else if (b->comp) {
2150 for (x = 0; x < w4; x++) {
2151 mv[x].ref[0] = b->ref[0];
2152 mv[x].ref[1] = b->ref[1];
2153 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2154 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2157 for (x = 0; x < w4; x++) {
2158 mv[x].ref[0] = b->ref[0];
2160 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2166 // FIXME merge cnt/eob arguments?
2167 static av_always_inline int
2168 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2169 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2170 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2171 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2172 const int16_t *band_counts, const int16_t *qmul)
2174 int i = 0, band = 0, band_left = band_counts[band];
2175 uint8_t *tp = p[0][nnz];
2176 uint8_t cache[1024];
2181 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2182 eob[band][nnz][val]++;
2187 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2188 cnt[band][nnz][0]++;
2190 band_left = band_counts[++band];
2192 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2194 if (++i == n_coeffs)
2195 break; //invalid input; blocks should end with EOB
2200 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2201 cnt[band][nnz][1]++;
2205 // fill in p[3-10] (model fill) - only once per frame for each pos
2207 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2209 cnt[band][nnz][2]++;
2210 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2211 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2212 cache[rc] = val = 2;
2214 val = 3 + vp56_rac_get_prob(c, tp[5]);
2217 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2219 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2220 val = 5 + vp56_rac_get_prob(c, 159);
2222 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2223 val += vp56_rac_get_prob(c, 145);
2227 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2228 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2229 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2230 val += (vp56_rac_get_prob(c, 148) << 1);
2231 val += vp56_rac_get_prob(c, 140);
2233 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2234 val += (vp56_rac_get_prob(c, 155) << 2);
2235 val += (vp56_rac_get_prob(c, 140) << 1);
2236 val += vp56_rac_get_prob(c, 135);
2238 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2239 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2240 val += (vp56_rac_get_prob(c, 157) << 3);
2241 val += (vp56_rac_get_prob(c, 141) << 2);
2242 val += (vp56_rac_get_prob(c, 134) << 1);
2243 val += vp56_rac_get_prob(c, 130);
2246 if (!is8bitsperpixel) {
2248 val += vp56_rac_get_prob(c, 255) << 17;
2249 val += vp56_rac_get_prob(c, 255) << 16;
2251 val += (vp56_rac_get_prob(c, 255) << 15);
2252 val += (vp56_rac_get_prob(c, 255) << 14);
2254 val += (vp56_rac_get_prob(c, 254) << 13);
2255 val += (vp56_rac_get_prob(c, 254) << 12);
2256 val += (vp56_rac_get_prob(c, 254) << 11);
2257 val += (vp56_rac_get_prob(c, 252) << 10);
2258 val += (vp56_rac_get_prob(c, 249) << 9);
2259 val += (vp56_rac_get_prob(c, 243) << 8);
2260 val += (vp56_rac_get_prob(c, 230) << 7);
2261 val += (vp56_rac_get_prob(c, 196) << 6);
2262 val += (vp56_rac_get_prob(c, 177) << 5);
2263 val += (vp56_rac_get_prob(c, 153) << 4);
2264 val += (vp56_rac_get_prob(c, 140) << 3);
2265 val += (vp56_rac_get_prob(c, 133) << 2);
2266 val += (vp56_rac_get_prob(c, 130) << 1);
2267 val += vp56_rac_get_prob(c, 129);
2271 #define STORE_COEF(c, i, v) do { \
2272 if (is8bitsperpixel) { \
2275 AV_WN32A(&c[i * 2], v); \
2279 band_left = band_counts[++band];
2281 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2283 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2284 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2286 } while (++i < n_coeffs);
2291 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2292 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2293 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2294 const int16_t (*nb)[2], const int16_t *band_counts,
2295 const int16_t *qmul)
2297 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2298 nnz, scan, nb, band_counts, qmul);
2301 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2302 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2303 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2304 const int16_t (*nb)[2], const int16_t *band_counts,
2305 const int16_t *qmul)
2307 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2308 nnz, scan, nb, band_counts, qmul);
2311 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2312 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2313 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2314 const int16_t (*nb)[2], const int16_t *band_counts,
2315 const int16_t *qmul)
2317 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2318 nnz, scan, nb, band_counts, qmul);
2321 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2322 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2323 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2324 const int16_t (*nb)[2], const int16_t *band_counts,
2325 const int16_t *qmul)
2327 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2328 nnz, scan, nb, band_counts, qmul);
2331 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2333 VP9Context *s = ctx->priv_data;
2335 int row = s->row, col = s->col;
2336 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2337 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2338 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2339 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2340 int end_x = FFMIN(2 * (s->cols - col), w4);
2341 int end_y = FFMIN(2 * (s->rows - row), h4);
2342 int n, pl, x, y, res;
2343 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2344 int tx = 4 * s->lossless + b->tx;
2345 const int16_t * const *yscans = vp9_scans[tx];
2346 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2347 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2348 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2349 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2350 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2351 static const int16_t band_counts[4][8] = {
2352 { 1, 2, 3, 4, 3, 16 - 13 },
2353 { 1, 2, 3, 4, 11, 64 - 21 },
2354 { 1, 2, 3, 4, 11, 256 - 21 },
2355 { 1, 2, 3, 4, 11, 1024 - 21 },
2357 const int16_t *y_band_counts = band_counts[b->tx];
2358 const int16_t *uv_band_counts = band_counts[b->uvtx];
2359 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2360 int total_coeff = 0;
2362 #define MERGE(la, end, step, rd) \
2363 for (n = 0; n < end; n += step) \
2364 la[n] = !!rd(&la[n])
2365 #define MERGE_CTX(step, rd) \
2367 MERGE(l, end_y, step, rd); \
2368 MERGE(a, end_x, step, rd); \
2371 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2372 for (n = 0, y = 0; y < end_y; y += step) { \
2373 for (x = 0; x < end_x; x += step, n += step * step) { \
2374 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2375 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2376 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2377 c, e, p, a[x] + l[y], yscans[txtp], \
2378 ynbs[txtp], y_band_counts, qmul[0]); \
2379 a[x] = l[y] = !!res; \
2380 total_coeff |= !!res; \
2382 AV_WN16A(&s->eob[n], res); \
2389 #define SPLAT(la, end, step, cond) \
2391 for (n = 1; n < end; n += step) \
2392 la[n] = la[n - 1]; \
2393 } else if (step == 4) { \
2395 for (n = 0; n < end; n += step) \
2396 AV_WN32A(&la[n], la[n] * 0x01010101); \
2398 for (n = 0; n < end; n += step) \
2399 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2401 } else /* step == 8 */ { \
2403 if (HAVE_FAST_64BIT) { \
2404 for (n = 0; n < end; n += step) \
2405 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2407 for (n = 0; n < end; n += step) { \
2408 uint32_t v32 = la[n] * 0x01010101; \
2409 AV_WN32A(&la[n], v32); \
2410 AV_WN32A(&la[n + 4], v32); \
2414 for (n = 0; n < end; n += step) \
2415 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2418 #define SPLAT_CTX(step) \
2420 SPLAT(a, end_x, step, end_x == w4); \
2421 SPLAT(l, end_y, step, end_y == h4); \
2427 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2430 MERGE_CTX(2, AV_RN16A);
2431 DECODE_Y_COEF_LOOP(2, 0,);
2435 MERGE_CTX(4, AV_RN32A);
2436 DECODE_Y_COEF_LOOP(4, 0,);
2440 MERGE_CTX(8, AV_RN64A);
2441 DECODE_Y_COEF_LOOP(8, 0, 32);
2446 #define DECODE_UV_COEF_LOOP(step, v) \
2447 for (n = 0, y = 0; y < end_y; y += step) { \
2448 for (x = 0; x < end_x; x += step, n += step * step) { \
2449 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2450 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2451 16 * step * step, c, e, p, a[x] + l[y], \
2452 uvscan, uvnb, uv_band_counts, qmul[1]); \
2453 a[x] = l[y] = !!res; \
2454 total_coeff |= !!res; \
2456 AV_WN16A(&s->uveob[pl][n], res); \
2458 s->uveob[pl][n] = res; \
2463 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2464 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2465 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2470 for (pl = 0; pl < 2; pl++) {
2471 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2472 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2475 DECODE_UV_COEF_LOOP(1,);
2478 MERGE_CTX(2, AV_RN16A);
2479 DECODE_UV_COEF_LOOP(2,);
2483 MERGE_CTX(4, AV_RN32A);
2484 DECODE_UV_COEF_LOOP(4,);
2488 MERGE_CTX(8, AV_RN64A);
2489 DECODE_UV_COEF_LOOP(8, 32);
2498 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2500 return decode_coeffs(ctx, 1);
2503 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2505 return decode_coeffs(ctx, 0);
2508 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2509 uint8_t *dst_edge, ptrdiff_t stride_edge,
2510 uint8_t *dst_inner, ptrdiff_t stride_inner,
2511 uint8_t *l, int col, int x, int w,
2512 int row, int y, enum TxfmMode tx,
2513 int p, int ss_h, int ss_v, int bytesperpixel)
2515 int have_top = row > 0 || y > 0;
2516 int have_left = col > s->tiling.tile_col_start || x > 0;
2517 int have_right = x < w - 1;
2519 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2520 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2521 { DC_127_PRED, VERT_PRED } },
2522 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2523 { HOR_PRED, HOR_PRED } },
2524 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2525 { LEFT_DC_PRED, DC_PRED } },
2526 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2527 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2528 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2529 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2530 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2531 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2532 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2533 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2534 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2535 { DC_127_PRED, VERT_LEFT_PRED } },
2536 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2537 { HOR_UP_PRED, HOR_UP_PRED } },
2538 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2539 { HOR_PRED, TM_VP8_PRED } },
2541 static const struct {
2542 uint8_t needs_left:1;
2543 uint8_t needs_top:1;
2544 uint8_t needs_topleft:1;
2545 uint8_t needs_topright:1;
2546 uint8_t invert_left:1;
2547 } edges[N_INTRA_PRED_MODES] = {
2548 [VERT_PRED] = { .needs_top = 1 },
2549 [HOR_PRED] = { .needs_left = 1 },
2550 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2551 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2552 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2553 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2554 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2555 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2556 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2557 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2558 [LEFT_DC_PRED] = { .needs_left = 1 },
2559 [TOP_DC_PRED] = { .needs_top = 1 },
2560 [DC_128_PRED] = { 0 },
2561 [DC_127_PRED] = { 0 },
2562 [DC_129_PRED] = { 0 }
2565 av_assert2(mode >= 0 && mode < 10);
2566 mode = mode_conv[mode][have_left][have_top];
2567 if (edges[mode].needs_top) {
2568 uint8_t *top, *topleft;
2569 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2570 int n_px_need_tr = 0;
2572 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2575 // if top of sb64-row, use s->intra_pred_data[] instead of
2576 // dst[-stride] for intra prediction (it contains pre- instead of
2577 // post-loopfilter data)
2579 top = !(row & 7) && !y ?
2580 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2581 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2583 topleft = !(row & 7) && !y ?
2584 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2585 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2586 &dst_inner[-stride_inner];
2590 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2591 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2592 n_px_need + n_px_need_tr <= n_px_have) {
2596 if (n_px_need <= n_px_have) {
2597 memcpy(*a, top, n_px_need * bytesperpixel);
2599 #define memset_bpp(c, i1, v, i2, num) do { \
2600 if (bytesperpixel == 1) { \
2601 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2603 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2604 for (n = 0; n < (num); n++) { \
2605 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2609 memcpy(*a, top, n_px_have * bytesperpixel);
2610 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2613 #define memset_val(c, val, num) do { \
2614 if (bytesperpixel == 1) { \
2615 memset((c), (val), (num)); \
2618 for (n = 0; n < (num); n++) { \
2619 AV_WN16A(&(c)[n * 2], (val)); \
2623 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2625 if (edges[mode].needs_topleft) {
2626 if (have_left && have_top) {
2627 #define assign_bpp(c, i1, v, i2) do { \
2628 if (bytesperpixel == 1) { \
2629 (c)[(i1)] = (v)[(i2)]; \
2631 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2634 assign_bpp(*a, -1, topleft, -1);
2636 #define assign_val(c, i, v) do { \
2637 if (bytesperpixel == 1) { \
2640 AV_WN16A(&(c)[(i) * 2], (v)); \
2643 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2646 if (tx == TX_4X4 && edges[mode].needs_topright) {
2647 if (have_top && have_right &&
2648 n_px_need + n_px_need_tr <= n_px_have) {
2649 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2651 memset_bpp(*a, 4, *a, 3, 4);
2656 if (edges[mode].needs_left) {
2658 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2659 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2660 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2662 if (edges[mode].invert_left) {
2663 if (n_px_need <= n_px_have) {
2664 for (i = 0; i < n_px_need; i++)
2665 assign_bpp(l, i, &dst[i * stride], -1);
2667 for (i = 0; i < n_px_have; i++)
2668 assign_bpp(l, i, &dst[i * stride], -1);
2669 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2672 if (n_px_need <= n_px_have) {
2673 for (i = 0; i < n_px_need; i++)
2674 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2676 for (i = 0; i < n_px_have; i++)
2677 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2678 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2682 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2689 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2690 ptrdiff_t uv_off, int bytesperpixel)
2692 VP9Context *s = ctx->priv_data;
2694 int row = s->row, col = s->col;
2695 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2696 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2697 int end_x = FFMIN(2 * (s->cols - col), w4);
2698 int end_y = FFMIN(2 * (s->rows - row), h4);
2699 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2700 int uvstep1d = 1 << b->uvtx, p;
2701 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2702 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2703 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2705 for (n = 0, y = 0; y < end_y; y += step1d) {
2706 uint8_t *ptr = dst, *ptr_r = dst_r;
2707 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2708 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2709 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2711 uint8_t *a = &a_buf[32];
2712 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2713 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2715 mode = check_intra_mode(s, mode, &a, ptr_r,
2716 s->frames[CUR_FRAME].tf.f->linesize[0],
2717 ptr, s->y_stride, l,
2718 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2719 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2721 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2722 s->block + 16 * n * bytesperpixel, eob);
2724 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2725 dst += 4 * step1d * s->y_stride;
2732 step = 1 << (b->uvtx * 2);
2733 for (p = 0; p < 2; p++) {
2734 dst = s->dst[1 + p];
2735 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2736 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2737 uint8_t *ptr = dst, *ptr_r = dst_r;
2738 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2739 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2740 int mode = b->uvmode;
2741 uint8_t *a = &a_buf[32];
2742 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2744 mode = check_intra_mode(s, mode, &a, ptr_r,
2745 s->frames[CUR_FRAME].tf.f->linesize[1],
2746 ptr, s->uv_stride, l, col, x, w4, row, y,
2747 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2748 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2750 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2751 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2753 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2754 dst += 4 * uvstep1d * s->uv_stride;
2759 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2761 intra_recon(ctx, y_off, uv_off, 1);
2764 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2766 intra_recon(ctx, y_off, uv_off, 2);
2769 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2770 uint8_t *dst, ptrdiff_t dst_stride,
2771 const uint8_t *ref, ptrdiff_t ref_stride,
2772 ThreadFrame *ref_frame,
2773 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2774 int px, int py, int pw, int ph,
2775 int bw, int bh, int w, int h, int bytesperpixel,
2776 const uint16_t *scale, const uint8_t *step)
2778 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2780 int refbw_m1, refbh_m1;
2784 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2785 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2786 // BUG libvpx seems to scale the two components separately. This introduces
2787 // rounding errors but we have to reproduce them to be exactly compatible
2788 // with the output from libvpx...
2789 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2790 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2794 ref += y * ref_stride + x * bytesperpixel;
2797 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2798 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2799 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2800 // we use +7 because the last 7 pixels of each sbrow can be changed in
2801 // the longest loopfilter of the next sbrow
2802 th = (y + refbh_m1 + 4 + 7) >> 6;
2803 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2804 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2805 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2806 ref - 3 * ref_stride - 3 * bytesperpixel,
2808 refbw_m1 + 8, refbh_m1 + 8,
2809 x - 3, y - 3, w, h);
2810 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2813 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2816 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2817 uint8_t *dst_u, uint8_t *dst_v,
2818 ptrdiff_t dst_stride,
2819 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2820 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2821 ThreadFrame *ref_frame,
2822 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2823 int px, int py, int pw, int ph,
2824 int bw, int bh, int w, int h, int bytesperpixel,
2825 const uint16_t *scale, const uint8_t *step)
2828 int refbw_m1, refbh_m1;
2833 // BUG https://code.google.com/p/webm/issues/detail?id=820
2834 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2835 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2837 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2838 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2841 // BUG https://code.google.com/p/webm/issues/detail?id=820
2842 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2843 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2845 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2846 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2851 ref_u += y * src_stride_u + x * bytesperpixel;
2852 ref_v += y * src_stride_v + x * bytesperpixel;
2855 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2856 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2857 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2858 // we use +7 because the last 7 pixels of each sbrow can be changed in
2859 // the longest loopfilter of the next sbrow
2860 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2861 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2862 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2863 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2864 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2866 refbw_m1 + 8, refbh_m1 + 8,
2867 x - 3, y - 3, w, h);
2868 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2869 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2871 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2872 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2874 refbw_m1 + 8, refbh_m1 + 8,
2875 x - 3, y - 3, w, h);
2876 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2877 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2879 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2880 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2884 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2885 px, py, pw, ph, bw, bh, w, h, i) \
2886 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2887 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2888 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2889 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2890 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2891 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2892 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2893 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2895 #define FN(x) x##_scaled_8bpp
2896 #define BYTES_PER_PIXEL 1
2897 #include "vp9_mc_template.c"
2899 #undef BYTES_PER_PIXEL
2900 #define FN(x) x##_scaled_16bpp
2901 #define BYTES_PER_PIXEL 2
2902 #include "vp9_mc_template.c"
2904 #undef mc_chroma_dir
2906 #undef BYTES_PER_PIXEL
2909 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2910 uint8_t *dst, ptrdiff_t dst_stride,
2911 const uint8_t *ref, ptrdiff_t ref_stride,
2912 ThreadFrame *ref_frame,
2913 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2914 int bw, int bh, int w, int h, int bytesperpixel)
2916 int mx = mv->x, my = mv->y, th;
2920 ref += y * ref_stride + x * bytesperpixel;
2923 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2924 // we use +7 because the last 7 pixels of each sbrow can be changed in
2925 // the longest loopfilter of the next sbrow
2926 th = (y + bh + 4 * !!my + 7) >> 6;
2927 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2928 if (x < !!mx * 3 || y < !!my * 3 ||
2929 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2930 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2931 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2933 bw + !!mx * 7, bh + !!my * 7,
2934 x - !!mx * 3, y - !!my * 3, w, h);
2935 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2938 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2941 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2942 uint8_t *dst_u, uint8_t *dst_v,
2943 ptrdiff_t dst_stride,
2944 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2945 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2946 ThreadFrame *ref_frame,
2947 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2948 int bw, int bh, int w, int h, int bytesperpixel)
2950 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2954 ref_u += y * src_stride_u + x * bytesperpixel;
2955 ref_v += y * src_stride_v + x * bytesperpixel;
2958 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2959 // we use +7 because the last 7 pixels of each sbrow can be changed in
2960 // the longest loopfilter of the next sbrow
2961 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2962 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2963 if (x < !!mx * 3 || y < !!my * 3 ||
2964 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2965 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2966 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2968 bw + !!mx * 7, bh + !!my * 7,
2969 x - !!mx * 3, y - !!my * 3, w, h);
2970 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2971 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2973 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2974 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2976 bw + !!mx * 7, bh + !!my * 7,
2977 x - !!mx * 3, y - !!my * 3, w, h);
2978 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2979 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2981 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2982 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2986 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2987 px, py, pw, ph, bw, bh, w, h, i) \
2988 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2989 mv, bw, bh, w, h, bytesperpixel)
2990 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2991 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2992 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2993 row, col, mv, bw, bh, w, h, bytesperpixel)
2995 #define FN(x) x##_8bpp
2996 #define BYTES_PER_PIXEL 1
2997 #include "vp9_mc_template.c"
2999 #undef BYTES_PER_PIXEL
3000 #define FN(x) x##_16bpp
3001 #define BYTES_PER_PIXEL 2
3002 #include "vp9_mc_template.c"
3003 #undef mc_luma_dir_dir
3004 #undef mc_chroma_dir_dir
3006 #undef BYTES_PER_PIXEL
3009 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3011 VP9Context *s = ctx->priv_data;
3013 int row = s->row, col = s->col;
3015 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3016 if (bytesperpixel == 1) {
3017 inter_pred_scaled_8bpp(ctx);
3019 inter_pred_scaled_16bpp(ctx);
3022 if (bytesperpixel == 1) {
3023 inter_pred_8bpp(ctx);
3025 inter_pred_16bpp(ctx);
3029 /* mostly copied intra_recon() */
3031 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3032 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3033 int end_x = FFMIN(2 * (s->cols - col), w4);
3034 int end_y = FFMIN(2 * (s->rows - row), h4);
3035 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3036 int uvstep1d = 1 << b->uvtx, p;
3037 uint8_t *dst = s->dst[0];
3040 for (n = 0, y = 0; y < end_y; y += step1d) {
3042 for (x = 0; x < end_x; x += step1d,
3043 ptr += 4 * step1d * bytesperpixel, n += step) {
3044 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3047 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3048 s->block + 16 * n * bytesperpixel, eob);
3050 dst += 4 * s->y_stride * step1d;
3056 step = 1 << (b->uvtx * 2);
3057 for (p = 0; p < 2; p++) {
3058 dst = s->dst[p + 1];
3059 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3061 for (x = 0; x < end_x; x += uvstep1d,
3062 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3063 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3066 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3067 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3069 dst += 4 * uvstep1d * s->uv_stride;
3075 static void inter_recon_8bpp(AVCodecContext *ctx)
3077 inter_recon(ctx, 1);
3080 static void inter_recon_16bpp(AVCodecContext *ctx)
3082 inter_recon(ctx, 2);
3085 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3086 int row_and_7, int col_and_7,
3087 int w, int h, int col_end, int row_end,
3088 enum TxfmMode tx, int skip_inter)
3090 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3091 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3093 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3094 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3095 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3096 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3098 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3099 // edges. This means that for UV, we work on two subsampled blocks at
3100 // a time, and we only use the topleft block's mode information to set
3101 // things like block strength. Thus, for any block size smaller than
3102 // 16x16, ignore the odd portion of the block.
3103 if (tx == TX_4X4 && (ss_v | ss_h)) {
3118 if (tx == TX_4X4 && !skip_inter) {
3119 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3120 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3121 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3123 for (y = row_and_7; y < h + row_and_7; y++) {
3124 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3126 mask[0][y][1] |= m_row_8;
3127 mask[0][y][2] |= m_row_4;
3128 // for odd lines, if the odd col is not being filtered,
3129 // skip odd row also:
3136 // if a/c are even row/col and b/d are odd, and d is skipped,
3137 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3138 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3139 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3141 mask[1][y][col_mask_id] |= m_col;
3144 mask[0][y][3] |= m_col;
3146 if (ss_h && (col_end & 1))
3147 mask[1][y][3] |= (t << (w - 1)) - t;
3149 mask[1][y][3] |= m_col;
3153 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3156 int mask_id = (tx == TX_8X8);
3157 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3158 int l2 = tx + ss_h - 1, step1d;
3159 int m_row = m_col & masks[l2];
3161 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3162 // 8wd loopfilter to prevent going off the visible edge.
3163 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3164 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3165 int m_row_8 = m_row - m_row_16;
3167 for (y = row_and_7; y < h + row_and_7; y++) {
3168 mask[0][y][0] |= m_row_16;
3169 mask[0][y][1] |= m_row_8;
3172 for (y = row_and_7; y < h + row_and_7; y++)
3173 mask[0][y][mask_id] |= m_row;
3178 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3179 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3180 mask[1][y][0] |= m_col;
3181 if (y - row_and_7 == h - 1)
3182 mask[1][y][1] |= m_col;
3184 for (y = row_and_7; y < h + row_and_7; y += step1d)
3185 mask[1][y][mask_id] |= m_col;
3187 } else if (tx != TX_4X4) {
3190 mask_id = (tx == TX_8X8) || (h == ss_v);
3191 mask[1][row_and_7][mask_id] |= m_col;
3192 mask_id = (tx == TX_8X8) || (w == ss_h);
3193 for (y = row_and_7; y < h + row_and_7; y++)
3194 mask[0][y][mask_id] |= t;
3196 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3198 for (y = row_and_7; y < h + row_and_7; y++) {
3199 mask[0][y][2] |= t4;
3200 mask[0][y][1] |= t8;
3202 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3207 static void decode_b(AVCodecContext *ctx, int row, int col,
3208 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3209 enum BlockLevel bl, enum BlockPartition bp)
3211 VP9Context *s = ctx->priv_data;
3213 enum BlockSize bs = bl * 3 + bp;
3214 int bytesperpixel = s->bytesperpixel;
3215 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3217 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3223 s->min_mv.x = -(128 + col * 64);
3224 s->min_mv.y = -(128 + row * 64);
3225 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3226 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3232 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3233 (s->ss_v && h4 * 2 == (1 << b->tx)));
3238 if (bytesperpixel == 1) {
3239 has_coeffs = decode_coeffs_8bpp(ctx);
3241 has_coeffs = decode_coeffs_16bpp(ctx);
3243 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3245 memset(&s->above_skip_ctx[col], 1, w4);
3246 memset(&s->left_skip_ctx[s->row7], 1, h4);
3251 #define SPLAT_ZERO_CTX(v, n) \
3253 case 1: v = 0; break; \
3254 case 2: AV_ZERO16(&v); break; \
3255 case 4: AV_ZERO32(&v); break; \
3256 case 8: AV_ZERO64(&v); break; \
3257 case 16: AV_ZERO128(&v); break; \
3259 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3261 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3262 if (s->ss_##dir2) { \
3263 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3264 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3266 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3267 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3272 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3273 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3274 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3275 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3278 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3279 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3280 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3281 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3286 s->block += w4 * h4 * 64 * bytesperpixel;
3287 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3288 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3289 s->eob += 4 * w4 * h4;
3290 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3291 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3297 // emulated overhangs if the stride of the target buffer can't hold. This
3298 // makes it possible to support emu-edge and so on even if we have large block
3300 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3301 (row + h4) > s->rows;
3302 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3303 (row + h4) > s->rows;
3305 s->dst[0] = s->tmp_y;
3308 s->dst[0] = f->data[0] + yoff;
3309 s->y_stride = f->linesize[0];
3312 s->dst[1] = s->tmp_uv[0];
3313 s->dst[2] = s->tmp_uv[1];
3316 s->dst[1] = f->data[1] + uvoff;
3317 s->dst[2] = f->data[2] + uvoff;
3318 s->uv_stride = f->linesize[1];
3322 intra_recon_16bpp(ctx, yoff, uvoff);
3324 intra_recon_8bpp(ctx, yoff, uvoff);
3328 inter_recon_16bpp(ctx);
3330 inter_recon_8bpp(ctx);
3334 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3336 for (n = 0; o < w; n++) {
3341 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3342 s->tmp_y + o, 128, h, 0, 0);
3343 o += bw * bytesperpixel;
3348 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3349 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3351 for (n = s->ss_h; o < w; n++) {
3356 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3357 s->tmp_uv[0] + o, 128, h, 0, 0);
3358 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3359 s->tmp_uv[1] + o, 128, h, 0, 0);
3360 o += bw * bytesperpixel;
3365 // pick filter level and find edges to apply filter to
3366 if (s->filter.level &&
3367 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3368 [b->mode[3] != ZEROMV]) > 0) {
3369 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3370 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3372 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3373 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3374 if (s->ss_h || s->ss_v)
3375 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3376 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3377 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3378 b->uvtx, skip_inter);
3380 if (!s->filter.lim_lut[lvl]) {
3381 int sharp = s->filter.sharpness;
3385 limit >>= (sharp + 3) >> 2;
3386 limit = FFMIN(limit, 9 - sharp);
3388 limit = FFMAX(limit, 1);
3390 s->filter.lim_lut[lvl] = limit;
3391 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3397 s->block += w4 * h4 * 64 * bytesperpixel;
3398 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3399 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3400 s->eob += 4 * w4 * h4;
3401 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3402 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3406 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3407 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3409 VP9Context *s = ctx->priv_data;
3410 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3411 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3412 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3413 s->prob.p.partition[bl][c];
3414 enum BlockPartition bp;
3415 ptrdiff_t hbs = 4 >> bl;
3416 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3417 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3418 int bytesperpixel = s->bytesperpixel;
3421 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3422 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423 } else if (col + hbs < s->cols) { // FIXME why not <=?
3424 if (row + hbs < s->rows) { // FIXME why not <=?
3425 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3427 case PARTITION_NONE:
3428 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3431 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3432 yoff += hbs * 8 * y_stride;
3433 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3434 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3437 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3438 yoff += hbs * 8 * bytesperpixel;
3439 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3440 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3442 case PARTITION_SPLIT:
3443 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3444 decode_sb(ctx, row, col + hbs, lflvl,
3445 yoff + 8 * hbs * bytesperpixel,
3446 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3447 yoff += hbs * 8 * y_stride;
3448 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3449 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3450 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3451 yoff + 8 * hbs * bytesperpixel,
3452 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3457 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3458 bp = PARTITION_SPLIT;
3459 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3460 decode_sb(ctx, row, col + hbs, lflvl,
3461 yoff + 8 * hbs * bytesperpixel,
3462 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3465 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3467 } else if (row + hbs < s->rows) { // FIXME why not <=?
3468 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3469 bp = PARTITION_SPLIT;
3470 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3471 yoff += hbs * 8 * y_stride;
3472 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3473 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3476 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3479 bp = PARTITION_SPLIT;
3480 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3482 s->counts.partition[bl][c][bp]++;
3485 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3486 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3488 VP9Context *s = ctx->priv_data;
3490 ptrdiff_t hbs = 4 >> bl;
3491 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3492 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3493 int bytesperpixel = s->bytesperpixel;
3496 av_assert2(b->bl == BL_8X8);
3497 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3498 } else if (s->b->bl == bl) {
3499 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3500 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3501 yoff += hbs * 8 * y_stride;
3502 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3503 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3504 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3505 yoff += hbs * 8 * bytesperpixel;
3506 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3507 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3510 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3511 if (col + hbs < s->cols) { // FIXME why not <=?
3512 if (row + hbs < s->rows) {
3513 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3514 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3515 yoff += hbs * 8 * y_stride;
3516 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3517 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3518 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3519 yoff + 8 * hbs * bytesperpixel,
3520 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3522 yoff += hbs * 8 * bytesperpixel;
3523 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3524 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3526 } else if (row + hbs < s->rows) {
3527 yoff += hbs * 8 * y_stride;
3528 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3529 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3534 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3535 uint8_t *lvl, uint8_t (*mask)[4],
3536 uint8_t *dst, ptrdiff_t ls)
3538 int y, x, bytesperpixel = s->bytesperpixel;
3540 // filter edges between columns (e.g. block1 | block2)
3541 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3542 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3543 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3544 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3545 unsigned hm = hm1 | hm2 | hm13 | hm23;
3547 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3550 int L = *l, H = L >> 4;
3551 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3553 if (hmask1[0] & x) {
3554 if (hmask2[0] & x) {
3555 av_assert2(l[8 << ss_v] == L);
3556 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3558 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3560 } else if (hm2 & x) {
3563 E |= s->filter.mblim_lut[L] << 8;
3564 I |= s->filter.lim_lut[L] << 8;
3565 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3567 [0](ptr, ls, E, I, H);
3569 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3570 [0](ptr, ls, E, I, H);
3572 } else if (hm2 & x) {
3573 int L = l[8 << ss_v], H = L >> 4;
3574 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3576 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3577 [0](ptr + 8 * ls, ls, E, I, H);
3585 int L = *l, H = L >> 4;
3586 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3591 E |= s->filter.mblim_lut[L] << 8;
3592 I |= s->filter.lim_lut[L] << 8;
3593 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3595 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3597 } else if (hm23 & x) {
3598 int L = l[8 << ss_v], H = L >> 4;
3599 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3601 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3609 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3610 uint8_t *lvl, uint8_t (*mask)[4],
3611 uint8_t *dst, ptrdiff_t ls)
3613 int y, x, bytesperpixel = s->bytesperpixel;
3616 // filter edges between rows (e.g. ------)
3618 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3619 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3620 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3622 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3625 int L = *l, H = L >> 4;
3626 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3629 if (vmask[0] & (x << (1 + ss_h))) {
3630 av_assert2(l[1 + ss_h] == L);
3631 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3633 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3635 } else if (vm & (x << (1 + ss_h))) {
3638 E |= s->filter.mblim_lut[L] << 8;
3639 I |= s->filter.lim_lut[L] << 8;
3640 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3641 [!!(vmask[1] & (x << (1 + ss_h)))]
3642 [1](ptr, ls, E, I, H);
3644 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3645 [1](ptr, ls, E, I, H);
3647 } else if (vm & (x << (1 + ss_h))) {
3648 int L = l[1 + ss_h], H = L >> 4;
3649 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3651 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3652 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3657 int L = *l, H = L >> 4;
3658 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3660 if (vm3 & (x << (1 + ss_h))) {
3663 E |= s->filter.mblim_lut[L] << 8;
3664 I |= s->filter.lim_lut[L] << 8;
3665 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3667 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3669 } else if (vm3 & (x << (1 + ss_h))) {
3670 int L = l[1 + ss_h], H = L >> 4;
3671 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3673 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3686 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3687 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3689 VP9Context *s = ctx->priv_data;
3690 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3691 uint8_t *dst = f->data[0] + yoff;
3692 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3693 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3696 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3697 // if you think of them as acting on a 8x8 block max, we can interleave
3698 // each v/h within the single x loop, but that only works if we work on
3699 // 8 pixel blocks, and we won't always do that (we want at least 16px
3700 // to use SSE2 optimizations, perhaps 32 for AVX2)
3702 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3703 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3705 for (p = 0; p < 2; p++) {
3706 dst = f->data[1 + p] + uvoff;
3707 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3708 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3712 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3714 int sb_start = ( idx * n) >> log2_n;
3715 int sb_end = ((idx + 1) * n) >> log2_n;
3716 *start = FFMIN(sb_start, n) << 3;
3717 *end = FFMIN(sb_end, n) << 3;
3720 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3721 int max_count, int update_factor)
3723 unsigned ct = ct0 + ct1, p2, p1;
3729 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3730 p2 = av_clip(p2, 1, 255);
3731 ct = FFMIN(ct, max_count);
3732 update_factor = FASTDIV(update_factor * ct, max_count);
3734 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3735 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3738 static void adapt_probs(VP9Context *s)
3741 prob_context *p = &s->prob_ctx[s->framectxid].p;
3742 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3745 for (i = 0; i < 4; i++)
3746 for (j = 0; j < 2; j++)
3747 for (k = 0; k < 2; k++)
3748 for (l = 0; l < 6; l++)
3749 for (m = 0; m < 6; m++) {
3750 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3751 unsigned *e = s->counts.eob[i][j][k][l][m];
3752 unsigned *c = s->counts.coef[i][j][k][l][m];
3754 if (l == 0 && m >= 3) // dc only has 3 pt
3757 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3758 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3759 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3762 if (s->keyframe || s->intraonly) {
3763 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3764 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3765 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3766 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3771 for (i = 0; i < 3; i++)
3772 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3775 for (i = 0; i < 4; i++)
3776 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3779 if (s->comppredmode == PRED_SWITCHABLE) {
3780 for (i = 0; i < 5; i++)
3781 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3785 if (s->comppredmode != PRED_SINGLEREF) {
3786 for (i = 0; i < 5; i++)
3787 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3788 s->counts.comp_ref[i][1], 20, 128);
3791 if (s->comppredmode != PRED_COMPREF) {
3792 for (i = 0; i < 5; i++) {
3793 uint8_t *pp = p->single_ref[i];
3794 unsigned (*c)[2] = s->counts.single_ref[i];
3796 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3797 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3801 // block partitioning
3802 for (i = 0; i < 4; i++)
3803 for (j = 0; j < 4; j++) {
3804 uint8_t *pp = p->partition[i][j];
3805 unsigned *c = s->counts.partition[i][j];
3807 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3808 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3809 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3813 if (s->txfmmode == TX_SWITCHABLE) {
3814 for (i = 0; i < 2; i++) {
3815 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3817 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3818 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3819 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3820 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3821 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3822 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3826 // interpolation filter
3827 if (s->filtermode == FILTER_SWITCHABLE) {
3828 for (i = 0; i < 4; i++) {
3829 uint8_t *pp = p->filter[i];
3830 unsigned *c = s->counts.filter[i];
3832 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3833 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3838 for (i = 0; i < 7; i++) {
3839 uint8_t *pp = p->mv_mode[i];
3840 unsigned *c = s->counts.mv_mode[i];
3842 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3843 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3844 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3849 uint8_t *pp = p->mv_joint;
3850 unsigned *c = s->counts.mv_joint;
3852 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3853 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3854 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3858 for (i = 0; i < 2; i++) {
3860 unsigned *c, (*c2)[2], sum;
3862 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3863 s->counts.mv_comp[i].sign[1], 20, 128);
3865 pp = p->mv_comp[i].classes;
3866 c = s->counts.mv_comp[i].classes;
3867 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3868 adapt_prob(&pp[0], c[0], sum, 20, 128);
3870 adapt_prob(&pp[1], c[1], sum, 20, 128);
3872 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3873 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3875 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3876 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3878 adapt_prob(&pp[6], c[6], sum, 20, 128);
3879 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3880 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3881 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3883 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3884 s->counts.mv_comp[i].class0[1], 20, 128);
3885 pp = p->mv_comp[i].bits;
3886 c2 = s->counts.mv_comp[i].bits;
3887 for (j = 0; j < 10; j++)
3888 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3890 for (j = 0; j < 2; j++) {
3891 pp = p->mv_comp[i].class0_fp[j];
3892 c = s->counts.mv_comp[i].class0_fp[j];
3893 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3894 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3895 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3897 pp = p->mv_comp[i].fp;
3898 c = s->counts.mv_comp[i].fp;
3899 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3900 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3901 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3903 if (s->highprecisionmvs) {
3904 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3905 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3906 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3907 s->counts.mv_comp[i].hp[1], 20, 128);
3912 for (i = 0; i < 4; i++) {
3913 uint8_t *pp = p->y_mode[i];
3914 unsigned *c = s->counts.y_mode[i], sum, s2;
3916 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3917 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3918 sum -= c[TM_VP8_PRED];
3919 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3920 sum -= c[VERT_PRED];
3921 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3922 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3924 adapt_prob(&pp[3], s2, sum, 20, 128);
3926 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3927 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3928 sum -= c[DIAG_DOWN_LEFT_PRED];
3929 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3930 sum -= c[VERT_LEFT_PRED];
3931 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3932 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3936 for (i = 0; i < 10; i++) {
3937 uint8_t *pp = p->uv_mode[i];
3938 unsigned *c = s->counts.uv_mode[i], sum, s2;
3940 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3941 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3942 sum -= c[TM_VP8_PRED];
3943 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3944 sum -= c[VERT_PRED];
3945 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3946 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3948 adapt_prob(&pp[3], s2, sum, 20, 128);
3950 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3951 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3952 sum -= c[DIAG_DOWN_LEFT_PRED];
3953 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3954 sum -= c[VERT_LEFT_PRED];
3955 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3956 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3960 static void free_buffers(VP9Context *s)
3962 av_freep(&s->intra_pred_data[0]);
3963 av_freep(&s->b_base);
3964 av_freep(&s->block_base);
3967 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3969 VP9Context *s = ctx->priv_data;
3972 for (i = 0; i < 3; i++) {
3973 if (s->frames[i].tf.f->data[0])
3974 vp9_unref_frame(ctx, &s->frames[i]);
3975 av_frame_free(&s->frames[i].tf.f);
3977 for (i = 0; i < 8; i++) {
3978 if (s->refs[i].f->data[0])
3979 ff_thread_release_buffer(ctx, &s->refs[i]);
3980 av_frame_free(&s->refs[i].f);
3981 if (s->next_refs[i].f->data[0])
3982 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3983 av_frame_free(&s->next_refs[i].f);
3993 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3994 int *got_frame, AVPacket *pkt)
3996 const uint8_t *data = pkt->data;
3997 int size = pkt->size;
3998 VP9Context *s = ctx->priv_data;
3999 int res, tile_row, tile_col, i, ref, row, col;
4000 int retain_segmap_ref = s->frames[REF_FRAME_SEGMAP].segmentation_map &&
4001 (!s->segmentation.enabled || !s->segmentation.update_map);
4002 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4006 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4008 } else if (res == 0) {
4009 if (!s->refs[ref].f->data[0]) {
4010 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4011 return AVERROR_INVALIDDATA;
4013 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4015 ((AVFrame *)frame)->pkt_pts = pkt->pts;
4016 ((AVFrame *)frame)->pkt_dts = pkt->dts;
4017 for (i = 0; i < 8; i++) {
4018 if (s->next_refs[i].f->data[0])
4019 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4020 if (s->refs[i].f->data[0] &&
4021 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4030 if (!retain_segmap_ref || s->keyframe || s->intraonly) {
4031 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4032 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4033 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4034 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4037 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4038 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4039 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4040 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4042 if (s->frames[CUR_FRAME].tf.f->data[0])
4043 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4044 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4046 f = s->frames[CUR_FRAME].tf.f;
4047 f->key_frame = s->keyframe;
4048 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4049 ls_y = f->linesize[0];
4050 ls_uv =f->linesize[1];
4053 for (i = 0; i < 8; i++) {
4054 if (s->next_refs[i].f->data[0])
4055 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4056 if (s->refreshrefmask & (1 << i)) {
4057 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4058 } else if (s->refs[i].f->data[0]) {
4059 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4065 // main tile decode loop
4066 bytesperpixel = s->bytesperpixel;
4067 memset(s->above_partition_ctx, 0, s->cols);
4068 memset(s->above_skip_ctx, 0, s->cols);
4069 if (s->keyframe || s->intraonly) {
4070 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4072 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4074 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4075 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4076 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4077 memset(s->above_segpred_ctx, 0, s->cols);
4078 s->pass = s->frames[CUR_FRAME].uses_2pass =
4079 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4080 if ((res = update_block_buffers(ctx)) < 0) {
4081 av_log(ctx, AV_LOG_ERROR,
4082 "Failed to allocate block buffers\n");
4085 if (s->refreshctx && s->parallelmode) {
4088 for (i = 0; i < 4; i++) {
4089 for (j = 0; j < 2; j++)
4090 for (k = 0; k < 2; k++)
4091 for (l = 0; l < 6; l++)
4092 for (m = 0; m < 6; m++)
4093 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4094 s->prob.coef[i][j][k][l][m], 3);
4095 if (s->txfmmode == i)
4098 s->prob_ctx[s->framectxid].p = s->prob.p;
4099 ff_thread_finish_setup(ctx);
4100 } else if (!s->refreshctx) {
4101 ff_thread_finish_setup(ctx);
4107 s->block = s->block_base;
4108 s->uvblock[0] = s->uvblock_base[0];
4109 s->uvblock[1] = s->uvblock_base[1];
4110 s->eob = s->eob_base;
4111 s->uveob[0] = s->uveob_base[0];
4112 s->uveob[1] = s->uveob_base[1];
4114 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4115 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4116 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4118 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4121 if (tile_col == s->tiling.tile_cols - 1 &&
4122 tile_row == s->tiling.tile_rows - 1) {
4125 tile_size = AV_RB32(data);
4129 if (tile_size > size) {
4130 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4131 return AVERROR_INVALIDDATA;
4133 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4134 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4135 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4136 return AVERROR_INVALIDDATA;
4143 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4144 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4145 struct VP9Filter *lflvl_ptr = s->lflvl;
4146 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4148 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4149 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4150 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4153 memset(s->left_partition_ctx, 0, 8);
4154 memset(s->left_skip_ctx, 0, 8);
4155 if (s->keyframe || s->intraonly) {
4156 memset(s->left_mode_ctx, DC_PRED, 16);
4158 memset(s->left_mode_ctx, NEARESTMV, 8);
4160 memset(s->left_y_nnz_ctx, 0, 16);
4161 memset(s->left_uv_nnz_ctx, 0, 32);
4162 memset(s->left_segpred_ctx, 0, 8);
4164 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4167 for (col = s->tiling.tile_col_start;
4168 col < s->tiling.tile_col_end;
4169 col += 8, yoff2 += 64 * bytesperpixel,
4170 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4171 // FIXME integrate with lf code (i.e. zero after each
4172 // use, similar to invtxfm coefficients, or similar)
4174 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4178 decode_sb_mem(ctx, row, col, lflvl_ptr,
4179 yoff2, uvoff2, BL_64X64);
4181 decode_sb(ctx, row, col, lflvl_ptr,
4182 yoff2, uvoff2, BL_64X64);
4186 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4194 // backup pre-loopfilter reconstruction data for intra
4195 // prediction of next row of sb64s
4196 if (row + 8 < s->rows) {
4197 memcpy(s->intra_pred_data[0],
4198 f->data[0] + yoff + 63 * ls_y,
4199 8 * s->cols * bytesperpixel);
4200 memcpy(s->intra_pred_data[1],
4201 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4202 8 * s->cols * bytesperpixel >> s->ss_h);
4203 memcpy(s->intra_pred_data[2],
4204 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4205 8 * s->cols * bytesperpixel >> s->ss_h);
4208 // loopfilter one row
4209 if (s->filter.level) {
4212 lflvl_ptr = s->lflvl;
4213 for (col = 0; col < s->cols;
4214 col += 8, yoff2 += 64 * bytesperpixel,
4215 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4216 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4220 // FIXME maybe we can make this more finegrained by running the
4221 // loopfilter per-block instead of after each sbrow
4222 // In fact that would also make intra pred left preparation easier?
4223 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4227 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4229 ff_thread_finish_setup(ctx);
4231 } while (s->pass++ == 1);
4232 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4235 for (i = 0; i < 8; i++) {
4236 if (s->refs[i].f->data[0])
4237 ff_thread_release_buffer(ctx, &s->refs[i]);
4238 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4241 if (!s->invisible) {
4242 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4250 static void vp9_decode_flush(AVCodecContext *ctx)
4252 VP9Context *s = ctx->priv_data;
4255 for (i = 0; i < 3; i++)
4256 vp9_unref_frame(ctx, &s->frames[i]);
4257 for (i = 0; i < 8; i++)
4258 ff_thread_release_buffer(ctx, &s->refs[i]);
4261 static int init_frames(AVCodecContext *ctx)
4263 VP9Context *s = ctx->priv_data;
4266 for (i = 0; i < 3; i++) {
4267 s->frames[i].tf.f = av_frame_alloc();
4268 if (!s->frames[i].tf.f) {
4269 vp9_decode_free(ctx);
4270 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4271 return AVERROR(ENOMEM);
4274 for (i = 0; i < 8; i++) {
4275 s->refs[i].f = av_frame_alloc();
4276 s->next_refs[i].f = av_frame_alloc();
4277 if (!s->refs[i].f || !s->next_refs[i].f) {
4278 vp9_decode_free(ctx);
4279 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4280 return AVERROR(ENOMEM);
4287 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4289 VP9Context *s = ctx->priv_data;
4291 ctx->internal->allocate_progress = 1;
4293 s->filter.sharpness = -1;
4295 return init_frames(ctx);
4298 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4300 return init_frames(avctx);
4303 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4306 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4308 // detect size changes in other threads
4309 if (s->intra_pred_data[0] &&
4310 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4314 for (i = 0; i < 3; i++) {
4315 if (s->frames[i].tf.f->data[0])
4316 vp9_unref_frame(dst, &s->frames[i]);
4317 if (ssrc->frames[i].tf.f->data[0]) {
4318 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4322 for (i = 0; i < 8; i++) {
4323 if (s->refs[i].f->data[0])
4324 ff_thread_release_buffer(dst, &s->refs[i]);
4325 if (ssrc->next_refs[i].f->data[0]) {
4326 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4331 s->invisible = ssrc->invisible;
4332 s->keyframe = ssrc->keyframe;
4333 s->intraonly = ssrc->intraonly;
4334 s->ss_v = ssrc->ss_v;
4335 s->ss_h = ssrc->ss_h;
4336 s->segmentation.enabled = ssrc->segmentation.enabled;
4337 s->segmentation.update_map = ssrc->segmentation.update_map;
4338 s->bytesperpixel = ssrc->bytesperpixel;
4340 s->bpp_index = ssrc->bpp_index;
4341 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4342 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4343 if (ssrc->segmentation.enabled) {
4344 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4345 sizeof(s->segmentation.feat));
4351 static const AVProfile profiles[] = {
4352 { FF_PROFILE_VP9_0, "Profile 0" },
4353 { FF_PROFILE_VP9_1, "Profile 1" },
4354 { FF_PROFILE_VP9_2, "Profile 2" },
4355 { FF_PROFILE_VP9_3, "Profile 3" },
4356 { FF_PROFILE_UNKNOWN },
4359 AVCodec ff_vp9_decoder = {
4361 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4362 .type = AVMEDIA_TYPE_VIDEO,
4363 .id = AV_CODEC_ID_VP9,
4364 .priv_data_size = sizeof(VP9Context),
4365 .init = vp9_decode_init,
4366 .close = vp9_decode_free,
4367 .decode = vp9_decode_frame,
4368 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4369 .flush = vp9_decode_flush,
4370 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4371 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4372 .profiles = NULL_IF_CONFIG_SMALL(profiles),