2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
156 uint8_t ignore_refmap;
161 uint8_t skip_enabled;
170 unsigned log2_tile_cols, log2_tile_rows;
171 unsigned tile_cols, tile_rows;
172 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174 unsigned sb_cols, sb_rows, rows, cols;
177 uint8_t coef[4][2][2][6][6][3];
181 uint8_t coef[4][2][2][6][6][11];
186 unsigned y_mode[4][10];
187 unsigned uv_mode[10][10];
188 unsigned filter[4][3];
189 unsigned mv_mode[7][4];
190 unsigned intra[4][2];
192 unsigned single_ref[5][2][2];
193 unsigned comp_ref[5][2];
194 unsigned tx32p[2][4];
195 unsigned tx16p[2][3];
198 unsigned mv_joint[4];
201 unsigned classes[11];
203 unsigned bits[10][2];
204 unsigned class0_fp[2][4];
206 unsigned class0_hp[2];
209 unsigned partition[4][4][4];
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
213 enum TxfmMode txfmmode;
214 enum CompPredMode comppredmode;
216 // contextual (left/above) cache
217 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229 uint8_t *above_partition_ctx;
230 uint8_t *above_mode_ctx;
231 // FIXME maybe merge some of the below in a flags field?
232 uint8_t *above_y_nnz_ctx;
233 uint8_t *above_uv_nnz_ctx[2];
234 uint8_t *above_skip_ctx; // 1bit
235 uint8_t *above_txfm_ctx; // 2bit
236 uint8_t *above_segpred_ctx; // 1bit
237 uint8_t *above_intra_ctx; // 1bit
238 uint8_t *above_comp_ctx; // 1bit
239 uint8_t *above_ref_ctx; // 2bit
240 uint8_t *above_filter_ctx;
241 VP56mv (*above_mv_ctx)[2];
244 uint8_t *intra_pred_data[3];
245 struct VP9Filter *lflvl;
246 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248 // block reconstruction intermediates
249 int block_alloc_using_2pass;
250 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252 struct { int x, y; } min_mv, max_mv;
253 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255 uint16_t mvscale[3][2];
256 uint8_t mvstep[3][2];
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271 VP9Context *s = ctx->priv_data;
274 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276 sz = 64 * s->sb_cols * s->sb_rows;
277 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278 ff_thread_release_buffer(ctx, &f->tf);
279 return AVERROR(ENOMEM);
282 f->segmentation_map = f->extradata->data;
283 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
307 dst->uses_2pass = src->uses_2pass;
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 VP9Context *s = ctx->priv_data;
316 int bytesperpixel = s->bytesperpixel;
318 av_assert0(w > 0 && h > 0);
320 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326 s->sb_cols = (w + 63) >> 6;
327 s->sb_rows = (h + 63) >> 6;
328 s->cols = (w + 7) >> 3;
329 s->rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332 av_freep(&s->intra_pred_data[0]);
333 // FIXME we slightly over-allocate here for subsampled chroma, but a little
334 // bit of padding shouldn't affect performance...
335 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338 return AVERROR(ENOMEM);
339 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342 assign(s->above_y_nnz_ctx, uint8_t *, 16);
343 assign(s->above_mode_ctx, uint8_t *, 16);
344 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347 assign(s->above_partition_ctx, uint8_t *, 8);
348 assign(s->above_skip_ctx, uint8_t *, 8);
349 assign(s->above_txfm_ctx, uint8_t *, 8);
350 assign(s->above_segpred_ctx, uint8_t *, 8);
351 assign(s->above_intra_ctx, uint8_t *, 8);
352 assign(s->above_comp_ctx, uint8_t *, 8);
353 assign(s->above_ref_ctx, uint8_t *, 8);
354 assign(s->above_filter_ctx, uint8_t *, 8);
355 assign(s->lflvl, struct VP9Filter *, 1);
358 // these will be re-allocated a little later
359 av_freep(&s->b_base);
360 av_freep(&s->block_base);
362 if (s->bpp != s->last_bpp) {
363 ff_vp9dsp_init(&s->dsp, s->bpp);
364 ff_videodsp_init(&s->vdsp, s->bpp);
365 s->last_bpp = s->bpp;
371 static int update_block_buffers(AVCodecContext *ctx)
373 VP9Context *s = ctx->priv_data;
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380 av_free(s->block_base);
381 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383 if (s->frames[CUR_FRAME].uses_2pass) {
384 int sbs = s->sb_cols * s->sb_rows;
386 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388 16 * 16 + 2 * chroma_eobs) * sbs);
389 if (!s->b_base || !s->block_base)
390 return AVERROR(ENOMEM);
391 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397 s->b_base = av_malloc(sizeof(VP9Block));
398 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399 16 * 16 + 2 * chroma_eobs);
400 if (!s->b_base || !s->block_base)
401 return AVERROR(ENOMEM);
402 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405 s->uveob_base[0] = s->eob_base + 16 * 16;
406 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
414 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416 int v = get_bits(gb, n);
417 return get_bits1(gb) ? -v : v;
420 static av_always_inline int inv_recenter_nonneg(int v, int m)
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
428 static const int inv_map_table[255] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 /* This code is trying to do a differential probability update. For a
452 * current probability A in the range [1, 255], the difference to a new
453 * probability of any value can be expressed differentially as 1-A,255-A
454 * where some part of this (absolute range) exists both in positive as
455 * well as the negative part, whereas another part only exists in one
456 * half. We're trying to code this shared part differentially, i.e.
457 * times two where the value of the lowest bit specifies the sign, and
458 * the single part is then coded on top of this. This absolute difference
459 * then again has a value of [0,254], but a bigger value in this range
460 * indicates that we're further away from the original value A, so we
461 * can code this as a VLC code, since higher values are increasingly
462 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463 * updates vs. the 'fine, exact' updates further down the range, which
464 * adds one extra dimension to this differential update model. */
466 if (!vp8_rac_get(c)) {
467 d = vp8_rac_get_uint(c, 4) + 0;
468 } else if (!vp8_rac_get(c)) {
469 d = vp8_rac_get_uint(c, 4) + 16;
470 } else if (!vp8_rac_get(c)) {
471 d = vp8_rac_get_uint(c, 5) + 32;
473 d = vp8_rac_get_uint(c, 7);
475 d = (d << 1) - 65 + vp8_rac_get(c);
477 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
480 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
481 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
484 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
486 static const enum AVColorSpace colorspaces[8] = {
487 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
488 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
490 VP9Context *s = ctx->priv_data;
491 enum AVPixelFormat res;
492 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
495 s->bpp = 8 + bits * 2;
496 s->bytesperpixel = (7 + s->bpp) >> 3;
497 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
498 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
499 static const enum AVPixelFormat pix_fmt_rgb[3] = {
500 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
502 if (ctx->profile & 1) {
503 s->ss_h = s->ss_v = 1;
504 res = pix_fmt_rgb[bits];
505 ctx->color_range = AVCOL_RANGE_JPEG;
507 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
509 return AVERROR_INVALIDDATA;
512 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
513 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
514 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
515 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
516 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
517 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
518 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
520 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
521 if (ctx->profile & 1) {
522 s->ss_h = get_bits1(&s->gb);
523 s->ss_v = get_bits1(&s->gb);
524 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
525 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
527 return AVERROR_INVALIDDATA;
528 } else if (get_bits1(&s->gb)) {
529 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
531 return AVERROR_INVALIDDATA;
534 s->ss_h = s->ss_v = 1;
535 res = pix_fmt_for_ss[bits][1][1];
542 static int decode_frame_header(AVCodecContext *ctx,
543 const uint8_t *data, int size, int *ref)
545 VP9Context *s = ctx->priv_data;
546 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
547 enum AVPixelFormat fmt = ctx->pix_fmt;
549 const uint8_t *data2;
552 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
553 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
556 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
557 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
558 return AVERROR_INVALIDDATA;
560 ctx->profile = get_bits1(&s->gb);
561 ctx->profile |= get_bits1(&s->gb) << 1;
562 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
563 if (ctx->profile > 3) {
564 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
565 return AVERROR_INVALIDDATA;
567 if (get_bits1(&s->gb)) {
568 *ref = get_bits(&s->gb, 3);
571 s->last_keyframe = s->keyframe;
572 s->keyframe = !get_bits1(&s->gb);
573 last_invisible = s->invisible;
574 s->invisible = !get_bits1(&s->gb);
575 s->errorres = get_bits1(&s->gb);
576 s->use_last_frame_mvs = !s->errorres && !last_invisible;
578 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
579 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
580 return AVERROR_INVALIDDATA;
582 if ((fmt = read_colorspace_details(ctx)) < 0)
584 // for profile 1, here follows the subsampling bits
585 s->refreshrefmask = 0xff;
586 w = get_bits(&s->gb, 16) + 1;
587 h = get_bits(&s->gb, 16) + 1;
588 if (get_bits1(&s->gb)) // display size
589 skip_bits(&s->gb, 32);
591 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
592 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
594 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
595 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
596 return AVERROR_INVALIDDATA;
598 if (ctx->profile >= 1) {
599 if ((fmt = read_colorspace_details(ctx)) < 0)
602 s->ss_h = s->ss_v = 1;
605 s->bytesperpixel = 1;
606 fmt = AV_PIX_FMT_YUV420P;
607 ctx->colorspace = AVCOL_SPC_BT470BG;
608 ctx->color_range = AVCOL_RANGE_JPEG;
610 s->refreshrefmask = get_bits(&s->gb, 8);
611 w = get_bits(&s->gb, 16) + 1;
612 h = get_bits(&s->gb, 16) + 1;
613 if (get_bits1(&s->gb)) // display size
614 skip_bits(&s->gb, 32);
616 s->refreshrefmask = get_bits(&s->gb, 8);
617 s->refidx[0] = get_bits(&s->gb, 3);
618 s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
619 s->refidx[1] = get_bits(&s->gb, 3);
620 s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
621 s->refidx[2] = get_bits(&s->gb, 3);
622 s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
623 if (!s->refs[s->refidx[0]].f->data[0] ||
624 !s->refs[s->refidx[1]].f->data[0] ||
625 !s->refs[s->refidx[2]].f->data[0]) {
626 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
627 return AVERROR_INVALIDDATA;
629 if (get_bits1(&s->gb)) {
630 w = s->refs[s->refidx[0]].f->width;
631 h = s->refs[s->refidx[0]].f->height;
632 } else if (get_bits1(&s->gb)) {
633 w = s->refs[s->refidx[1]].f->width;
634 h = s->refs[s->refidx[1]].f->height;
635 } else if (get_bits1(&s->gb)) {
636 w = s->refs[s->refidx[2]].f->width;
637 h = s->refs[s->refidx[2]].f->height;
639 w = get_bits(&s->gb, 16) + 1;
640 h = get_bits(&s->gb, 16) + 1;
642 // Note that in this code, "CUR_FRAME" is actually before we
643 // have formally allocated a frame, and thus actually represents
645 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
646 s->frames[CUR_FRAME].tf.f->height == h;
647 if (get_bits1(&s->gb)) // display size
648 skip_bits(&s->gb, 32);
649 s->highprecisionmvs = get_bits1(&s->gb);
650 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
652 s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
653 s->signbias[0] != s->signbias[2]);
654 if (s->allowcompinter) {
655 if (s->signbias[0] == s->signbias[1]) {
657 s->varcompref[0] = 0;
658 s->varcompref[1] = 1;
659 } else if (s->signbias[0] == s->signbias[2]) {
661 s->varcompref[0] = 0;
662 s->varcompref[1] = 2;
665 s->varcompref[0] = 1;
666 s->varcompref[1] = 2;
670 for (i = 0; i < 3; i++) {
671 AVFrame *ref = s->refs[s->refidx[i]].f;
672 int refw = ref->width, refh = ref->height;
674 if (ref->format != fmt) {
675 av_log(ctx, AV_LOG_ERROR,
676 "Ref pixfmt (%s) did not match current frame (%s)",
677 av_get_pix_fmt_name(ref->format),
678 av_get_pix_fmt_name(fmt));
679 return AVERROR_INVALIDDATA;
680 } else if (refw == w && refh == h) {
681 s->mvscale[i][0] = s->mvscale[i][1] = 0;
683 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
684 av_log(ctx, AV_LOG_ERROR,
685 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
687 return AVERROR_INVALIDDATA;
689 s->mvscale[i][0] = (refw << 14) / w;
690 s->mvscale[i][1] = (refh << 14) / h;
691 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
692 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
697 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
698 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
699 s->framectxid = c = get_bits(&s->gb, 2);
701 /* loopfilter header data */
702 if (s->keyframe || s->errorres || s->intraonly) {
703 // reset loopfilter defaults
704 s->lf_delta.ref[0] = 1;
705 s->lf_delta.ref[1] = 0;
706 s->lf_delta.ref[2] = -1;
707 s->lf_delta.ref[3] = -1;
708 s->lf_delta.mode[0] = 0;
709 s->lf_delta.mode[1] = 0;
711 s->filter.level = get_bits(&s->gb, 6);
712 sharp = get_bits(&s->gb, 3);
713 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
714 // the old cache values since they are still valid
715 if (s->filter.sharpness != sharp)
716 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
717 s->filter.sharpness = sharp;
718 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
719 if (get_bits1(&s->gb)) {
720 for (i = 0; i < 4; i++)
721 if (get_bits1(&s->gb))
722 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
723 for (i = 0; i < 2; i++)
724 if (get_bits1(&s->gb))
725 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
729 /* quantization header data */
730 s->yac_qi = get_bits(&s->gb, 8);
731 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
732 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
733 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
734 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
735 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
737 /* segmentation header info */
738 s->segmentation.ignore_refmap = 0;
739 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
740 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
741 for (i = 0; i < 7; i++)
742 s->prob.seg[i] = get_bits1(&s->gb) ?
743 get_bits(&s->gb, 8) : 255;
744 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
745 for (i = 0; i < 3; i++)
746 s->prob.segpred[i] = get_bits1(&s->gb) ?
747 get_bits(&s->gb, 8) : 255;
750 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
751 (w != s->frames[CUR_FRAME].tf.f->width ||
752 h != s->frames[CUR_FRAME].tf.f->height)) {
753 av_log(ctx, AV_LOG_WARNING,
754 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
755 s->segmentation.temporal, s->segmentation.update_map);
756 s->segmentation.ignore_refmap = 1;
757 //return AVERROR_INVALIDDATA;
760 if (get_bits1(&s->gb)) {
761 s->segmentation.absolute_vals = get_bits1(&s->gb);
762 for (i = 0; i < 8; i++) {
763 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
764 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
765 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
766 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
767 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
768 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
769 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
773 s->segmentation.feat[0].q_enabled = 0;
774 s->segmentation.feat[0].lf_enabled = 0;
775 s->segmentation.feat[0].skip_enabled = 0;
776 s->segmentation.feat[0].ref_enabled = 0;
779 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
780 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
781 int qyac, qydc, quvac, quvdc, lflvl, sh;
783 if (s->segmentation.feat[i].q_enabled) {
784 if (s->segmentation.absolute_vals)
785 qyac = s->segmentation.feat[i].q_val;
787 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
791 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
792 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
793 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
794 qyac = av_clip_uintp2(qyac, 8);
796 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
797 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
798 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
799 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
801 sh = s->filter.level >= 32;
802 if (s->segmentation.feat[i].lf_enabled) {
803 if (s->segmentation.absolute_vals)
804 lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
806 lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
808 lflvl = s->filter.level;
810 if (s->lf_delta.enabled) {
811 s->segmentation.feat[i].lflvl[0][0] =
812 s->segmentation.feat[i].lflvl[0][1] =
813 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
814 for (j = 1; j < 4; j++) {
815 s->segmentation.feat[i].lflvl[j][0] =
816 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
817 s->lf_delta.mode[0]) * (1 << sh)), 6);
818 s->segmentation.feat[i].lflvl[j][1] =
819 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
820 s->lf_delta.mode[1]) * (1 << sh)), 6);
823 memset(s->segmentation.feat[i].lflvl, lflvl,
824 sizeof(s->segmentation.feat[i].lflvl));
829 if ((res = update_size(ctx, w, h, fmt)) < 0) {
830 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
833 for (s->tiling.log2_tile_cols = 0;
834 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
835 s->tiling.log2_tile_cols++) ;
836 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
837 max = FFMAX(0, max - 1);
838 while (max > s->tiling.log2_tile_cols) {
839 if (get_bits1(&s->gb))
840 s->tiling.log2_tile_cols++;
844 s->tiling.log2_tile_rows = decode012(&s->gb);
845 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
846 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
847 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
848 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
849 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
851 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
852 return AVERROR(ENOMEM);
856 if (s->keyframe || s->errorres || s->intraonly) {
857 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
858 s->prob_ctx[3].p = vp9_default_probs;
859 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
860 sizeof(vp9_default_coef_probs));
861 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
862 sizeof(vp9_default_coef_probs));
863 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
864 sizeof(vp9_default_coef_probs));
865 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
866 sizeof(vp9_default_coef_probs));
869 // next 16 bits is size of the rest of the header (arith-coded)
870 size2 = get_bits(&s->gb, 16);
871 data2 = align_get_bits(&s->gb);
872 if (size2 > size - (data2 - data)) {
873 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
874 return AVERROR_INVALIDDATA;
876 ff_vp56_init_range_decoder(&s->c, data2, size2);
877 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
878 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
879 return AVERROR_INVALIDDATA;
882 if (s->keyframe || s->intraonly) {
883 memset(s->counts.coef, 0, sizeof(s->counts.coef));
884 memset(s->counts.eob, 0, sizeof(s->counts.eob));
886 memset(&s->counts, 0, sizeof(s->counts));
888 // FIXME is it faster to not copy here, but do it down in the fw updates
889 // as explicit copies if the fw update is missing (and skip the copy upon
891 s->prob.p = s->prob_ctx[c].p;
895 s->txfmmode = TX_4X4;
897 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
898 if (s->txfmmode == 3)
899 s->txfmmode += vp8_rac_get(&s->c);
901 if (s->txfmmode == TX_SWITCHABLE) {
902 for (i = 0; i < 2; i++)
903 if (vp56_rac_get_prob_branchy(&s->c, 252))
904 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
905 for (i = 0; i < 2; i++)
906 for (j = 0; j < 2; j++)
907 if (vp56_rac_get_prob_branchy(&s->c, 252))
908 s->prob.p.tx16p[i][j] =
909 update_prob(&s->c, s->prob.p.tx16p[i][j]);
910 for (i = 0; i < 2; i++)
911 for (j = 0; j < 3; j++)
912 if (vp56_rac_get_prob_branchy(&s->c, 252))
913 s->prob.p.tx32p[i][j] =
914 update_prob(&s->c, s->prob.p.tx32p[i][j]);
919 for (i = 0; i < 4; i++) {
920 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
921 if (vp8_rac_get(&s->c)) {
922 for (j = 0; j < 2; j++)
923 for (k = 0; k < 2; k++)
924 for (l = 0; l < 6; l++)
925 for (m = 0; m < 6; m++) {
926 uint8_t *p = s->prob.coef[i][j][k][l][m];
927 uint8_t *r = ref[j][k][l][m];
928 if (m >= 3 && l == 0) // dc only has 3 pt
930 for (n = 0; n < 3; n++) {
931 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
932 p[n] = update_prob(&s->c, r[n]);
940 for (j = 0; j < 2; j++)
941 for (k = 0; k < 2; k++)
942 for (l = 0; l < 6; l++)
943 for (m = 0; m < 6; m++) {
944 uint8_t *p = s->prob.coef[i][j][k][l][m];
945 uint8_t *r = ref[j][k][l][m];
946 if (m > 3 && l == 0) // dc only has 3 pt
952 if (s->txfmmode == i)
957 for (i = 0; i < 3; i++)
958 if (vp56_rac_get_prob_branchy(&s->c, 252))
959 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
960 if (!s->keyframe && !s->intraonly) {
961 for (i = 0; i < 7; i++)
962 for (j = 0; j < 3; j++)
963 if (vp56_rac_get_prob_branchy(&s->c, 252))
964 s->prob.p.mv_mode[i][j] =
965 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
967 if (s->filtermode == FILTER_SWITCHABLE)
968 for (i = 0; i < 4; i++)
969 for (j = 0; j < 2; j++)
970 if (vp56_rac_get_prob_branchy(&s->c, 252))
971 s->prob.p.filter[i][j] =
972 update_prob(&s->c, s->prob.p.filter[i][j]);
974 for (i = 0; i < 4; i++)
975 if (vp56_rac_get_prob_branchy(&s->c, 252))
976 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
978 if (s->allowcompinter) {
979 s->comppredmode = vp8_rac_get(&s->c);
981 s->comppredmode += vp8_rac_get(&s->c);
982 if (s->comppredmode == PRED_SWITCHABLE)
983 for (i = 0; i < 5; i++)
984 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 update_prob(&s->c, s->prob.p.comp[i]);
988 s->comppredmode = PRED_SINGLEREF;
991 if (s->comppredmode != PRED_COMPREF) {
992 for (i = 0; i < 5; i++) {
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.single_ref[i][0] =
995 update_prob(&s->c, s->prob.p.single_ref[i][0]);
996 if (vp56_rac_get_prob_branchy(&s->c, 252))
997 s->prob.p.single_ref[i][1] =
998 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1002 if (s->comppredmode != PRED_SINGLEREF) {
1003 for (i = 0; i < 5; i++)
1004 if (vp56_rac_get_prob_branchy(&s->c, 252))
1005 s->prob.p.comp_ref[i] =
1006 update_prob(&s->c, s->prob.p.comp_ref[i]);
1009 for (i = 0; i < 4; i++)
1010 for (j = 0; j < 9; j++)
1011 if (vp56_rac_get_prob_branchy(&s->c, 252))
1012 s->prob.p.y_mode[i][j] =
1013 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1015 for (i = 0; i < 4; i++)
1016 for (j = 0; j < 4; j++)
1017 for (k = 0; k < 3; k++)
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.partition[3 - i][j][k] =
1020 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1022 // mv fields don't use the update_prob subexp model for some reason
1023 for (i = 0; i < 3; i++)
1024 if (vp56_rac_get_prob_branchy(&s->c, 252))
1025 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1027 for (i = 0; i < 2; i++) {
1028 if (vp56_rac_get_prob_branchy(&s->c, 252))
1029 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031 for (j = 0; j < 10; j++)
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].classes[j] =
1034 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1036 if (vp56_rac_get_prob_branchy(&s->c, 252))
1037 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1039 for (j = 0; j < 10; j++)
1040 if (vp56_rac_get_prob_branchy(&s->c, 252))
1041 s->prob.p.mv_comp[i].bits[j] =
1042 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1045 for (i = 0; i < 2; i++) {
1046 for (j = 0; j < 2; j++)
1047 for (k = 0; k < 3; k++)
1048 if (vp56_rac_get_prob_branchy(&s->c, 252))
1049 s->prob.p.mv_comp[i].class0_fp[j][k] =
1050 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1052 for (j = 0; j < 3; j++)
1053 if (vp56_rac_get_prob_branchy(&s->c, 252))
1054 s->prob.p.mv_comp[i].fp[j] =
1055 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1058 if (s->highprecisionmvs) {
1059 for (i = 0; i < 2; i++) {
1060 if (vp56_rac_get_prob_branchy(&s->c, 252))
1061 s->prob.p.mv_comp[i].class0_hp =
1062 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1064 if (vp56_rac_get_prob_branchy(&s->c, 252))
1065 s->prob.p.mv_comp[i].hp =
1066 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1071 return (data2 - data) + size2;
1074 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1077 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1078 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1081 static void find_ref_mvs(VP9Context *s,
1082 VP56mv *pmv, int ref, int z, int idx, int sb)
1084 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1085 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1086 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1087 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1088 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1089 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1090 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1091 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1092 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1093 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1094 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1095 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1096 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1097 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1098 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1099 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1100 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1101 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1102 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1103 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1104 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1105 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1106 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1107 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1108 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1109 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1110 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1113 int row = s->row, col = s->col, row7 = s->row7;
1114 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1115 #define INVALID_MV 0x80008000U
1116 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1119 #define RETURN_DIRECT_MV(mv) \
1121 uint32_t m = AV_RN32A(&mv); \
1125 } else if (mem == INVALID_MV) { \
1127 } else if (m != mem) { \
1134 if (sb == 2 || sb == 1) {
1135 RETURN_DIRECT_MV(b->mv[0][z]);
1136 } else if (sb == 3) {
1137 RETURN_DIRECT_MV(b->mv[2][z]);
1138 RETURN_DIRECT_MV(b->mv[1][z]);
1139 RETURN_DIRECT_MV(b->mv[0][z]);
1142 #define RETURN_MV(mv) \
1147 av_assert2(idx == 1); \
1148 av_assert2(mem != INVALID_MV); \
1149 if (mem_sub8x8 == INVALID_MV) { \
1150 clamp_mv(&tmp, &mv, s); \
1151 m = AV_RN32A(&tmp); \
1156 mem_sub8x8 = AV_RN32A(&mv); \
1157 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1158 clamp_mv(&tmp, &mv, s); \
1159 m = AV_RN32A(&tmp); \
1163 /* BUG I'm pretty sure this isn't the intention */ \
1169 uint32_t m = AV_RN32A(&mv); \
1171 clamp_mv(pmv, &mv, s); \
1173 } else if (mem == INVALID_MV) { \
1175 } else if (m != mem) { \
1176 clamp_mv(pmv, &mv, s); \
1183 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1184 if (mv->ref[0] == ref) {
1185 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1186 } else if (mv->ref[1] == ref) {
1187 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1190 if (col > s->tiling.tile_col_start) {
1191 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1192 if (mv->ref[0] == ref) {
1193 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1194 } else if (mv->ref[1] == ref) {
1195 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1203 // previously coded MVs in this neighbourhood, using same reference frame
1204 for (; i < 8; i++) {
1205 int c = p[i][0] + col, r = p[i][1] + row;
1207 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1208 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1210 if (mv->ref[0] == ref) {
1211 RETURN_MV(mv->mv[0]);
1212 } else if (mv->ref[1] == ref) {
1213 RETURN_MV(mv->mv[1]);
1218 // MV at this position in previous frame, using same reference frame
1219 if (s->use_last_frame_mvs) {
1220 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1222 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1223 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1224 if (mv->ref[0] == ref) {
1225 RETURN_MV(mv->mv[0]);
1226 } else if (mv->ref[1] == ref) {
1227 RETURN_MV(mv->mv[1]);
1231 #define RETURN_SCALE_MV(mv, scale) \
1234 VP56mv mv_temp = { -mv.x, -mv.y }; \
1235 RETURN_MV(mv_temp); \
1241 // previously coded MVs in this neighbourhood, using different reference frame
1242 for (i = 0; i < 8; i++) {
1243 int c = p[i][0] + col, r = p[i][1] + row;
1245 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1246 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1248 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1249 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1251 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1252 // BUG - libvpx has this condition regardless of whether
1253 // we used the first ref MV and pre-scaling
1254 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1255 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1260 // MV at this position in previous frame, using different reference frame
1261 if (s->use_last_frame_mvs) {
1262 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1264 // no need to await_progress, because we already did that above
1265 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1266 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1268 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1269 // BUG - libvpx has this condition regardless of whether
1270 // we used the first ref MV and pre-scaling
1271 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1272 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1277 clamp_mv(pmv, pmv, s);
1280 #undef RETURN_SCALE_MV
1283 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1285 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1286 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1287 s->prob.p.mv_comp[idx].classes);
1289 s->counts.mv_comp[idx].sign[sign]++;
1290 s->counts.mv_comp[idx].classes[c]++;
1294 for (n = 0, m = 0; m < c; m++) {
1295 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1297 s->counts.mv_comp[idx].bits[m][bit]++;
1300 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1302 s->counts.mv_comp[idx].fp[bit]++;
1304 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1305 s->counts.mv_comp[idx].hp[bit]++;
1309 // bug in libvpx - we count for bw entropy purposes even if the
1311 s->counts.mv_comp[idx].hp[1]++;
1315 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1316 s->counts.mv_comp[idx].class0[n]++;
1317 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1318 s->prob.p.mv_comp[idx].class0_fp[n]);
1319 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1320 n = (n << 3) | (bit << 1);
1322 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1323 s->counts.mv_comp[idx].class0_hp[bit]++;
1327 // bug in libvpx - we count for bw entropy purposes even if the
1329 s->counts.mv_comp[idx].class0_hp[1]++;
1333 return sign ? -(n + 1) : (n + 1);
1336 static void fill_mv(VP9Context *s,
1337 VP56mv *mv, int mode, int sb)
1341 if (mode == ZEROMV) {
1346 // FIXME cache this value and reuse for other subblocks
1347 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1348 mode == NEWMV ? -1 : sb);
1349 // FIXME maybe move this code into find_ref_mvs()
1350 if ((mode == NEWMV || sb == -1) &&
1351 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1365 if (mode == NEWMV) {
1366 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1367 s->prob.p.mv_joint);
1369 s->counts.mv_joint[j]++;
1370 if (j >= MV_JOINT_V)
1371 mv[0].y += read_mv_component(s, 0, hp);
1373 mv[0].x += read_mv_component(s, 1, hp);
1377 // FIXME cache this value and reuse for other subblocks
1378 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1379 mode == NEWMV ? -1 : sb);
1380 if ((mode == NEWMV || sb == -1) &&
1381 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1395 if (mode == NEWMV) {
1396 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1397 s->prob.p.mv_joint);
1399 s->counts.mv_joint[j]++;
1400 if (j >= MV_JOINT_V)
1401 mv[1].y += read_mv_component(s, 0, hp);
1403 mv[1].x += read_mv_component(s, 1, hp);
1409 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1410 ptrdiff_t stride, int v)
1420 int v16 = v * 0x0101;
1428 uint32_t v32 = v * 0x01010101;
1437 uint64_t v64 = v * 0x0101010101010101ULL;
1443 uint32_t v32 = v * 0x01010101;
1446 AV_WN32A(ptr + 4, v32);
1455 static void decode_mode(AVCodecContext *ctx)
1457 static const uint8_t left_ctx[N_BS_SIZES] = {
1458 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1460 static const uint8_t above_ctx[N_BS_SIZES] = {
1461 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1463 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1464 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1465 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1467 VP9Context *s = ctx->priv_data;
1469 int row = s->row, col = s->col, row7 = s->row7;
1470 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1471 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1472 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1473 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1474 int vref, filter_id;
1476 if (!s->segmentation.enabled) {
1478 } else if (s->keyframe || s->intraonly) {
1479 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1480 } else if (!s->segmentation.update_map ||
1481 (s->segmentation.temporal &&
1482 vp56_rac_get_prob_branchy(&s->c,
1483 s->prob.segpred[s->above_segpred_ctx[col] +
1484 s->left_segpred_ctx[row7]]))) {
1485 if (!s->errorres && !s->segmentation.ignore_refmap) {
1487 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1489 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1490 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1491 for (y = 0; y < h4; y++) {
1492 int idx_base = (y + row) * 8 * s->sb_cols + col;
1493 for (x = 0; x < w4; x++)
1494 pred = FFMIN(pred, refsegmap[idx_base + x]);
1496 av_assert1(pred < 8);
1502 memset(&s->above_segpred_ctx[col], 1, w4);
1503 memset(&s->left_segpred_ctx[row7], 1, h4);
1505 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1508 memset(&s->above_segpred_ctx[col], 0, w4);
1509 memset(&s->left_segpred_ctx[row7], 0, h4);
1511 if (s->segmentation.enabled &&
1512 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1513 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1514 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1517 b->skip = s->segmentation.enabled &&
1518 s->segmentation.feat[b->seg_id].skip_enabled;
1520 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1521 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1522 s->counts.skip[c][b->skip]++;
1525 if (s->keyframe || s->intraonly) {
1527 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1528 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1532 if (have_a && have_l) {
1533 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1536 c = have_a ? 2 * s->above_intra_ctx[col] :
1537 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1539 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1540 s->counts.intra[c][bit]++;
1544 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1548 c = (s->above_skip_ctx[col] ? max_tx :
1549 s->above_txfm_ctx[col]) +
1550 (s->left_skip_ctx[row7] ? max_tx :
1551 s->left_txfm_ctx[row7]) > max_tx;
1553 c = s->above_skip_ctx[col] ? 1 :
1554 (s->above_txfm_ctx[col] * 2 > max_tx);
1556 } else if (have_l) {
1557 c = s->left_skip_ctx[row7] ? 1 :
1558 (s->left_txfm_ctx[row7] * 2 > max_tx);
1564 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1566 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1568 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1570 s->counts.tx32p[c][b->tx]++;
1573 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1575 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1576 s->counts.tx16p[c][b->tx]++;
1579 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1580 s->counts.tx8p[c][b->tx]++;
1587 b->tx = FFMIN(max_tx, s->txfmmode);
1590 if (s->keyframe || s->intraonly) {
1591 uint8_t *a = &s->above_mode_ctx[col * 2];
1592 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1595 if (b->bs > BS_8x8) {
1596 // FIXME the memory storage intermediates here aren't really
1597 // necessary, they're just there to make the code slightly
1599 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1600 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1601 if (b->bs != BS_8x4) {
1602 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1603 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1604 l[0] = a[1] = b->mode[1];
1606 l[0] = a[1] = b->mode[1] = b->mode[0];
1608 if (b->bs != BS_4x8) {
1609 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1610 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1611 if (b->bs != BS_8x4) {
1612 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1613 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1614 l[1] = a[1] = b->mode[3];
1616 l[1] = a[1] = b->mode[3] = b->mode[2];
1619 b->mode[2] = b->mode[0];
1620 l[1] = a[1] = b->mode[3] = b->mode[1];
1623 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1624 vp9_default_kf_ymode_probs[*a][*l]);
1625 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1626 // FIXME this can probably be optimized
1627 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1628 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1630 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1631 vp9_default_kf_uvmode_probs[b->mode[3]]);
1632 } else if (b->intra) {
1634 if (b->bs > BS_8x8) {
1635 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1636 s->prob.p.y_mode[0]);
1637 s->counts.y_mode[0][b->mode[0]]++;
1638 if (b->bs != BS_8x4) {
1639 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1640 s->prob.p.y_mode[0]);
1641 s->counts.y_mode[0][b->mode[1]]++;
1643 b->mode[1] = b->mode[0];
1645 if (b->bs != BS_4x8) {
1646 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1647 s->prob.p.y_mode[0]);
1648 s->counts.y_mode[0][b->mode[2]]++;
1649 if (b->bs != BS_8x4) {
1650 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1651 s->prob.p.y_mode[0]);
1652 s->counts.y_mode[0][b->mode[3]]++;
1654 b->mode[3] = b->mode[2];
1657 b->mode[2] = b->mode[0];
1658 b->mode[3] = b->mode[1];
1661 static const uint8_t size_group[10] = {
1662 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1664 int sz = size_group[b->bs];
1666 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1667 s->prob.p.y_mode[sz]);
1668 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1669 s->counts.y_mode[sz][b->mode[3]]++;
1671 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1672 s->prob.p.uv_mode[b->mode[3]]);
1673 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1675 static const uint8_t inter_mode_ctx_lut[14][14] = {
1676 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1677 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1678 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1684 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1685 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1686 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1687 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1688 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1689 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1692 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1693 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1695 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1697 // read comp_pred flag
1698 if (s->comppredmode != PRED_SWITCHABLE) {
1699 b->comp = s->comppredmode == PRED_COMPREF;
1703 // FIXME add intra as ref=0xff (or -1) to make these easier?
1706 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1708 } else if (s->above_comp_ctx[col]) {
1709 c = 2 + (s->left_intra_ctx[row7] ||
1710 s->left_ref_ctx[row7] == s->fixcompref);
1711 } else if (s->left_comp_ctx[row7]) {
1712 c = 2 + (s->above_intra_ctx[col] ||
1713 s->above_ref_ctx[col] == s->fixcompref);
1715 c = (!s->above_intra_ctx[col] &&
1716 s->above_ref_ctx[col] == s->fixcompref) ^
1717 (!s->left_intra_ctx[row7] &&
1718 s->left_ref_ctx[row & 7] == s->fixcompref);
1721 c = s->above_comp_ctx[col] ? 3 :
1722 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1724 } else if (have_l) {
1725 c = s->left_comp_ctx[row7] ? 3 :
1726 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1730 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1731 s->counts.comp[c][b->comp]++;
1734 // read actual references
1735 // FIXME probably cache a few variables here to prevent repetitive
1736 // memory accesses below
1737 if (b->comp) /* two references */ {
1738 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1740 b->ref[fix_idx] = s->fixcompref;
1741 // FIXME can this codeblob be replaced by some sort of LUT?
1744 if (s->above_intra_ctx[col]) {
1745 if (s->left_intra_ctx[row7]) {
1748 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1750 } else if (s->left_intra_ctx[row7]) {
1751 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1753 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1755 if (refl == refa && refa == s->varcompref[1]) {
1757 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1758 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1759 (refl == s->fixcompref && refa == s->varcompref[0])) {
1762 c = (refa == refl) ? 3 : 1;
1764 } else if (!s->left_comp_ctx[row7]) {
1765 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1768 c = (refl == s->varcompref[1] &&
1769 refa != s->varcompref[1]) ? 2 : 4;
1771 } else if (!s->above_comp_ctx[col]) {
1772 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1775 c = (refa == s->varcompref[1] &&
1776 refl != s->varcompref[1]) ? 2 : 4;
1779 c = (refl == refa) ? 4 : 2;
1783 if (s->above_intra_ctx[col]) {
1785 } else if (s->above_comp_ctx[col]) {
1786 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1788 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1791 } else if (have_l) {
1792 if (s->left_intra_ctx[row7]) {
1794 } else if (s->left_comp_ctx[row7]) {
1795 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1797 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1802 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1803 b->ref[var_idx] = s->varcompref[bit];
1804 s->counts.comp_ref[c][bit]++;
1805 } else /* single reference */ {
1808 if (have_a && !s->above_intra_ctx[col]) {
1809 if (have_l && !s->left_intra_ctx[row7]) {
1810 if (s->left_comp_ctx[row7]) {
1811 if (s->above_comp_ctx[col]) {
1812 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1813 !s->above_ref_ctx[col]);
1815 c = (3 * !s->above_ref_ctx[col]) +
1816 (!s->fixcompref || !s->left_ref_ctx[row7]);
1818 } else if (s->above_comp_ctx[col]) {
1819 c = (3 * !s->left_ref_ctx[row7]) +
1820 (!s->fixcompref || !s->above_ref_ctx[col]);
1822 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1824 } else if (s->above_intra_ctx[col]) {
1826 } else if (s->above_comp_ctx[col]) {
1827 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1829 c = 4 * (!s->above_ref_ctx[col]);
1831 } else if (have_l && !s->left_intra_ctx[row7]) {
1832 if (s->left_intra_ctx[row7]) {
1834 } else if (s->left_comp_ctx[row7]) {
1835 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1837 c = 4 * (!s->left_ref_ctx[row7]);
1842 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1843 s->counts.single_ref[c][0][bit]++;
1847 // FIXME can this codeblob be replaced by some sort of LUT?
1850 if (s->left_intra_ctx[row7]) {
1851 if (s->above_intra_ctx[col]) {
1853 } else if (s->above_comp_ctx[col]) {
1854 c = 1 + 2 * (s->fixcompref == 1 ||
1855 s->above_ref_ctx[col] == 1);
1856 } else if (!s->above_ref_ctx[col]) {
1859 c = 4 * (s->above_ref_ctx[col] == 1);
1861 } else if (s->above_intra_ctx[col]) {
1862 if (s->left_intra_ctx[row7]) {
1864 } else if (s->left_comp_ctx[row7]) {
1865 c = 1 + 2 * (s->fixcompref == 1 ||
1866 s->left_ref_ctx[row7] == 1);
1867 } else if (!s->left_ref_ctx[row7]) {
1870 c = 4 * (s->left_ref_ctx[row7] == 1);
1872 } else if (s->above_comp_ctx[col]) {
1873 if (s->left_comp_ctx[row7]) {
1874 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1875 c = 3 * (s->fixcompref == 1 ||
1876 s->left_ref_ctx[row7] == 1);
1880 } else if (!s->left_ref_ctx[row7]) {
1881 c = 1 + 2 * (s->fixcompref == 1 ||
1882 s->above_ref_ctx[col] == 1);
1884 c = 3 * (s->left_ref_ctx[row7] == 1) +
1885 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1887 } else if (s->left_comp_ctx[row7]) {
1888 if (!s->above_ref_ctx[col]) {
1889 c = 1 + 2 * (s->fixcompref == 1 ||
1890 s->left_ref_ctx[row7] == 1);
1892 c = 3 * (s->above_ref_ctx[col] == 1) +
1893 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1895 } else if (!s->above_ref_ctx[col]) {
1896 if (!s->left_ref_ctx[row7]) {
1899 c = 4 * (s->left_ref_ctx[row7] == 1);
1901 } else if (!s->left_ref_ctx[row7]) {
1902 c = 4 * (s->above_ref_ctx[col] == 1);
1904 c = 2 * (s->left_ref_ctx[row7] == 1) +
1905 2 * (s->above_ref_ctx[col] == 1);
1908 if (s->above_intra_ctx[col] ||
1909 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1911 } else if (s->above_comp_ctx[col]) {
1912 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1914 c = 4 * (s->above_ref_ctx[col] == 1);
1917 } else if (have_l) {
1918 if (s->left_intra_ctx[row7] ||
1919 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1921 } else if (s->left_comp_ctx[row7]) {
1922 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1924 c = 4 * (s->left_ref_ctx[row7] == 1);
1929 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1930 s->counts.single_ref[c][1][bit]++;
1931 b->ref[0] = 1 + bit;
1936 if (b->bs <= BS_8x8) {
1937 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1938 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1940 static const uint8_t off[10] = {
1941 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1944 // FIXME this needs to use the LUT tables from find_ref_mvs
1945 // because not all are -1,0/0,-1
1946 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1947 [s->left_mode_ctx[row7 + off[b->bs]]];
1949 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1950 s->prob.p.mv_mode[c]);
1951 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1952 s->counts.mv_mode[c][b->mode[0] - 10]++;
1956 if (s->filtermode == FILTER_SWITCHABLE) {
1959 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1960 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1961 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1962 s->left_filter_ctx[row7] : 3;
1964 c = s->above_filter_ctx[col];
1966 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1967 c = s->left_filter_ctx[row7];
1972 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1973 s->prob.p.filter[c]);
1974 s->counts.filter[c][filter_id]++;
1975 b->filter = vp9_filter_lut[filter_id];
1977 b->filter = s->filtermode;
1980 if (b->bs > BS_8x8) {
1981 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1983 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1984 s->prob.p.mv_mode[c]);
1985 s->counts.mv_mode[c][b->mode[0] - 10]++;
1986 fill_mv(s, b->mv[0], b->mode[0], 0);
1988 if (b->bs != BS_8x4) {
1989 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1990 s->prob.p.mv_mode[c]);
1991 s->counts.mv_mode[c][b->mode[1] - 10]++;
1992 fill_mv(s, b->mv[1], b->mode[1], 1);
1994 b->mode[1] = b->mode[0];
1995 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1996 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1999 if (b->bs != BS_4x8) {
2000 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2001 s->prob.p.mv_mode[c]);
2002 s->counts.mv_mode[c][b->mode[2] - 10]++;
2003 fill_mv(s, b->mv[2], b->mode[2], 2);
2005 if (b->bs != BS_8x4) {
2006 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2007 s->prob.p.mv_mode[c]);
2008 s->counts.mv_mode[c][b->mode[3] - 10]++;
2009 fill_mv(s, b->mv[3], b->mode[3], 3);
2011 b->mode[3] = b->mode[2];
2012 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2013 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2016 b->mode[2] = b->mode[0];
2017 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2018 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2019 b->mode[3] = b->mode[1];
2020 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2021 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2024 fill_mv(s, b->mv[0], b->mode[0], -1);
2025 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2026 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2027 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2028 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2029 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2030 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2033 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2037 #define SPLAT_CTX(var, val, n) \
2039 case 1: var = val; break; \
2040 case 2: AV_WN16A(&var, val * 0x0101); break; \
2041 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2042 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2044 uint64_t v64 = val * 0x0101010101010101ULL; \
2045 AV_WN64A( &var, v64); \
2046 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2051 #define SPLAT_CTX(var, val, n) \
2053 case 1: var = val; break; \
2054 case 2: AV_WN16A(&var, val * 0x0101); break; \
2055 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2057 uint32_t v32 = val * 0x01010101; \
2058 AV_WN32A( &var, v32); \
2059 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2063 uint32_t v32 = val * 0x01010101; \
2064 AV_WN32A( &var, v32); \
2065 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2066 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2067 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2073 switch (bwh_tab[1][b->bs][0]) {
2074 #define SET_CTXS(dir, off, n) \
2076 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2077 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2078 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2079 if (!s->keyframe && !s->intraonly) { \
2080 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2081 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2082 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2084 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2085 if (s->filtermode == FILTER_SWITCHABLE) { \
2086 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2091 case 1: SET_CTXS(above, col, 1); break;
2092 case 2: SET_CTXS(above, col, 2); break;
2093 case 4: SET_CTXS(above, col, 4); break;
2094 case 8: SET_CTXS(above, col, 8); break;
2096 switch (bwh_tab[1][b->bs][1]) {
2097 case 1: SET_CTXS(left, row7, 1); break;
2098 case 2: SET_CTXS(left, row7, 2); break;
2099 case 4: SET_CTXS(left, row7, 4); break;
2100 case 8: SET_CTXS(left, row7, 8); break;
2105 if (!s->keyframe && !s->intraonly) {
2106 if (b->bs > BS_8x8) {
2107 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2109 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2110 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2111 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2112 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2113 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2114 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2115 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2116 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2118 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2120 for (n = 0; n < w4 * 2; n++) {
2121 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2122 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2124 for (n = 0; n < h4 * 2; n++) {
2125 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2126 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2132 for (y = 0; y < h4; y++) {
2133 int x, o = (row + y) * s->sb_cols * 8 + col;
2134 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2137 for (x = 0; x < w4; x++) {
2141 } else if (b->comp) {
2142 for (x = 0; x < w4; x++) {
2143 mv[x].ref[0] = b->ref[0];
2144 mv[x].ref[1] = b->ref[1];
2145 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2146 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2149 for (x = 0; x < w4; x++) {
2150 mv[x].ref[0] = b->ref[0];
2152 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2158 // FIXME merge cnt/eob arguments?
2159 static av_always_inline int
2160 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2161 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2162 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2163 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2164 const int16_t *band_counts, const int16_t *qmul)
2166 int i = 0, band = 0, band_left = band_counts[band];
2167 uint8_t *tp = p[0][nnz];
2168 uint8_t cache[1024];
2173 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2174 eob[band][nnz][val]++;
2179 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2180 cnt[band][nnz][0]++;
2182 band_left = band_counts[++band];
2184 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2186 if (++i == n_coeffs)
2187 break; //invalid input; blocks should end with EOB
2192 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2193 cnt[band][nnz][1]++;
2197 // fill in p[3-10] (model fill) - only once per frame for each pos
2199 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2201 cnt[band][nnz][2]++;
2202 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2203 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2204 cache[rc] = val = 2;
2206 val = 3 + vp56_rac_get_prob(c, tp[5]);
2209 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2211 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2212 val = 5 + vp56_rac_get_prob(c, 159);
2214 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2215 val += vp56_rac_get_prob(c, 145);
2219 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2220 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2221 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2222 val += (vp56_rac_get_prob(c, 148) << 1);
2223 val += vp56_rac_get_prob(c, 140);
2225 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2226 val += (vp56_rac_get_prob(c, 155) << 2);
2227 val += (vp56_rac_get_prob(c, 140) << 1);
2228 val += vp56_rac_get_prob(c, 135);
2230 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2231 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2232 val += (vp56_rac_get_prob(c, 157) << 3);
2233 val += (vp56_rac_get_prob(c, 141) << 2);
2234 val += (vp56_rac_get_prob(c, 134) << 1);
2235 val += vp56_rac_get_prob(c, 130);
2238 if (!is8bitsperpixel) {
2240 val += vp56_rac_get_prob(c, 255) << 17;
2241 val += vp56_rac_get_prob(c, 255) << 16;
2243 val += (vp56_rac_get_prob(c, 255) << 15);
2244 val += (vp56_rac_get_prob(c, 255) << 14);
2246 val += (vp56_rac_get_prob(c, 254) << 13);
2247 val += (vp56_rac_get_prob(c, 254) << 12);
2248 val += (vp56_rac_get_prob(c, 254) << 11);
2249 val += (vp56_rac_get_prob(c, 252) << 10);
2250 val += (vp56_rac_get_prob(c, 249) << 9);
2251 val += (vp56_rac_get_prob(c, 243) << 8);
2252 val += (vp56_rac_get_prob(c, 230) << 7);
2253 val += (vp56_rac_get_prob(c, 196) << 6);
2254 val += (vp56_rac_get_prob(c, 177) << 5);
2255 val += (vp56_rac_get_prob(c, 153) << 4);
2256 val += (vp56_rac_get_prob(c, 140) << 3);
2257 val += (vp56_rac_get_prob(c, 133) << 2);
2258 val += (vp56_rac_get_prob(c, 130) << 1);
2259 val += vp56_rac_get_prob(c, 129);
2263 #define STORE_COEF(c, i, v) do { \
2264 if (is8bitsperpixel) { \
2267 AV_WN32A(&c[i * 2], v); \
2271 band_left = band_counts[++band];
2273 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2275 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2276 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2278 } while (++i < n_coeffs);
2283 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2284 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2285 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2286 const int16_t (*nb)[2], const int16_t *band_counts,
2287 const int16_t *qmul)
2289 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2290 nnz, scan, nb, band_counts, qmul);
2293 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2294 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2295 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2296 const int16_t (*nb)[2], const int16_t *band_counts,
2297 const int16_t *qmul)
2299 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2300 nnz, scan, nb, band_counts, qmul);
2303 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2304 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2305 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2306 const int16_t (*nb)[2], const int16_t *band_counts,
2307 const int16_t *qmul)
2309 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2310 nnz, scan, nb, band_counts, qmul);
2313 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2314 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2315 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2316 const int16_t (*nb)[2], const int16_t *band_counts,
2317 const int16_t *qmul)
2319 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2320 nnz, scan, nb, band_counts, qmul);
2323 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2325 VP9Context *s = ctx->priv_data;
2327 int row = s->row, col = s->col;
2328 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2329 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2330 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2331 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2332 int end_x = FFMIN(2 * (s->cols - col), w4);
2333 int end_y = FFMIN(2 * (s->rows - row), h4);
2334 int n, pl, x, y, res;
2335 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2336 int tx = 4 * s->lossless + b->tx;
2337 const int16_t * const *yscans = vp9_scans[tx];
2338 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2339 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2340 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2341 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2342 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2343 static const int16_t band_counts[4][8] = {
2344 { 1, 2, 3, 4, 3, 16 - 13 },
2345 { 1, 2, 3, 4, 11, 64 - 21 },
2346 { 1, 2, 3, 4, 11, 256 - 21 },
2347 { 1, 2, 3, 4, 11, 1024 - 21 },
2349 const int16_t *y_band_counts = band_counts[b->tx];
2350 const int16_t *uv_band_counts = band_counts[b->uvtx];
2351 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2352 int total_coeff = 0;
2354 #define MERGE(la, end, step, rd) \
2355 for (n = 0; n < end; n += step) \
2356 la[n] = !!rd(&la[n])
2357 #define MERGE_CTX(step, rd) \
2359 MERGE(l, end_y, step, rd); \
2360 MERGE(a, end_x, step, rd); \
2363 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2364 for (n = 0, y = 0; y < end_y; y += step) { \
2365 for (x = 0; x < end_x; x += step, n += step * step) { \
2366 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2367 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2368 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2369 c, e, p, a[x] + l[y], yscans[txtp], \
2370 ynbs[txtp], y_band_counts, qmul[0]); \
2371 a[x] = l[y] = !!res; \
2372 total_coeff |= !!res; \
2374 AV_WN16A(&s->eob[n], res); \
2381 #define SPLAT(la, end, step, cond) \
2383 for (n = 1; n < end; n += step) \
2384 la[n] = la[n - 1]; \
2385 } else if (step == 4) { \
2387 for (n = 0; n < end; n += step) \
2388 AV_WN32A(&la[n], la[n] * 0x01010101); \
2390 for (n = 0; n < end; n += step) \
2391 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2393 } else /* step == 8 */ { \
2395 if (HAVE_FAST_64BIT) { \
2396 for (n = 0; n < end; n += step) \
2397 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2399 for (n = 0; n < end; n += step) { \
2400 uint32_t v32 = la[n] * 0x01010101; \
2401 AV_WN32A(&la[n], v32); \
2402 AV_WN32A(&la[n + 4], v32); \
2406 for (n = 0; n < end; n += step) \
2407 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2410 #define SPLAT_CTX(step) \
2412 SPLAT(a, end_x, step, end_x == w4); \
2413 SPLAT(l, end_y, step, end_y == h4); \
2419 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2422 MERGE_CTX(2, AV_RN16A);
2423 DECODE_Y_COEF_LOOP(2, 0,);
2427 MERGE_CTX(4, AV_RN32A);
2428 DECODE_Y_COEF_LOOP(4, 0,);
2432 MERGE_CTX(8, AV_RN64A);
2433 DECODE_Y_COEF_LOOP(8, 0, 32);
2438 #define DECODE_UV_COEF_LOOP(step, v) \
2439 for (n = 0, y = 0; y < end_y; y += step) { \
2440 for (x = 0; x < end_x; x += step, n += step * step) { \
2441 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2442 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2443 16 * step * step, c, e, p, a[x] + l[y], \
2444 uvscan, uvnb, uv_band_counts, qmul[1]); \
2445 a[x] = l[y] = !!res; \
2446 total_coeff |= !!res; \
2448 AV_WN16A(&s->uveob[pl][n], res); \
2450 s->uveob[pl][n] = res; \
2455 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2456 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2457 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2462 for (pl = 0; pl < 2; pl++) {
2463 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2464 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2467 DECODE_UV_COEF_LOOP(1,);
2470 MERGE_CTX(2, AV_RN16A);
2471 DECODE_UV_COEF_LOOP(2,);
2475 MERGE_CTX(4, AV_RN32A);
2476 DECODE_UV_COEF_LOOP(4,);
2480 MERGE_CTX(8, AV_RN64A);
2481 DECODE_UV_COEF_LOOP(8, 32);
2490 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2492 return decode_coeffs(ctx, 1);
2495 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2497 return decode_coeffs(ctx, 0);
2500 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2501 uint8_t *dst_edge, ptrdiff_t stride_edge,
2502 uint8_t *dst_inner, ptrdiff_t stride_inner,
2503 uint8_t *l, int col, int x, int w,
2504 int row, int y, enum TxfmMode tx,
2505 int p, int ss_h, int ss_v, int bytesperpixel)
2507 int have_top = row > 0 || y > 0;
2508 int have_left = col > s->tiling.tile_col_start || x > 0;
2509 int have_right = x < w - 1;
2511 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2512 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2513 { DC_127_PRED, VERT_PRED } },
2514 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2515 { HOR_PRED, HOR_PRED } },
2516 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2517 { LEFT_DC_PRED, DC_PRED } },
2518 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2519 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2520 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2521 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2522 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2523 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2524 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2525 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2526 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2527 { DC_127_PRED, VERT_LEFT_PRED } },
2528 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2529 { HOR_UP_PRED, HOR_UP_PRED } },
2530 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2531 { HOR_PRED, TM_VP8_PRED } },
2533 static const struct {
2534 uint8_t needs_left:1;
2535 uint8_t needs_top:1;
2536 uint8_t needs_topleft:1;
2537 uint8_t needs_topright:1;
2538 uint8_t invert_left:1;
2539 } edges[N_INTRA_PRED_MODES] = {
2540 [VERT_PRED] = { .needs_top = 1 },
2541 [HOR_PRED] = { .needs_left = 1 },
2542 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2543 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2544 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2545 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2546 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2547 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2548 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2549 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2550 [LEFT_DC_PRED] = { .needs_left = 1 },
2551 [TOP_DC_PRED] = { .needs_top = 1 },
2552 [DC_128_PRED] = { 0 },
2553 [DC_127_PRED] = { 0 },
2554 [DC_129_PRED] = { 0 }
2557 av_assert2(mode >= 0 && mode < 10);
2558 mode = mode_conv[mode][have_left][have_top];
2559 if (edges[mode].needs_top) {
2560 uint8_t *top, *topleft;
2561 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2562 int n_px_need_tr = 0;
2564 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2567 // if top of sb64-row, use s->intra_pred_data[] instead of
2568 // dst[-stride] for intra prediction (it contains pre- instead of
2569 // post-loopfilter data)
2571 top = !(row & 7) && !y ?
2572 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2573 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2575 topleft = !(row & 7) && !y ?
2576 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2577 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2578 &dst_inner[-stride_inner];
2582 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2583 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2584 n_px_need + n_px_need_tr <= n_px_have) {
2588 if (n_px_need <= n_px_have) {
2589 memcpy(*a, top, n_px_need * bytesperpixel);
2591 #define memset_bpp(c, i1, v, i2, num) do { \
2592 if (bytesperpixel == 1) { \
2593 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2595 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2596 for (n = 0; n < (num); n++) { \
2597 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2601 memcpy(*a, top, n_px_have * bytesperpixel);
2602 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2605 #define memset_val(c, val, num) do { \
2606 if (bytesperpixel == 1) { \
2607 memset((c), (val), (num)); \
2610 for (n = 0; n < (num); n++) { \
2611 AV_WN16A(&(c)[n * 2], (val)); \
2615 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2617 if (edges[mode].needs_topleft) {
2618 if (have_left && have_top) {
2619 #define assign_bpp(c, i1, v, i2) do { \
2620 if (bytesperpixel == 1) { \
2621 (c)[(i1)] = (v)[(i2)]; \
2623 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2626 assign_bpp(*a, -1, topleft, -1);
2628 #define assign_val(c, i, v) do { \
2629 if (bytesperpixel == 1) { \
2632 AV_WN16A(&(c)[(i) * 2], (v)); \
2635 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2638 if (tx == TX_4X4 && edges[mode].needs_topright) {
2639 if (have_top && have_right &&
2640 n_px_need + n_px_need_tr <= n_px_have) {
2641 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2643 memset_bpp(*a, 4, *a, 3, 4);
2648 if (edges[mode].needs_left) {
2650 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2651 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2652 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2654 if (edges[mode].invert_left) {
2655 if (n_px_need <= n_px_have) {
2656 for (i = 0; i < n_px_need; i++)
2657 assign_bpp(l, i, &dst[i * stride], -1);
2659 for (i = 0; i < n_px_have; i++)
2660 assign_bpp(l, i, &dst[i * stride], -1);
2661 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2664 if (n_px_need <= n_px_have) {
2665 for (i = 0; i < n_px_need; i++)
2666 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2668 for (i = 0; i < n_px_have; i++)
2669 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2670 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2674 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2681 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2682 ptrdiff_t uv_off, int bytesperpixel)
2684 VP9Context *s = ctx->priv_data;
2686 int row = s->row, col = s->col;
2687 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2688 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2689 int end_x = FFMIN(2 * (s->cols - col), w4);
2690 int end_y = FFMIN(2 * (s->rows - row), h4);
2691 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2692 int uvstep1d = 1 << b->uvtx, p;
2693 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2694 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2695 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2697 for (n = 0, y = 0; y < end_y; y += step1d) {
2698 uint8_t *ptr = dst, *ptr_r = dst_r;
2699 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2700 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2701 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2703 uint8_t *a = &a_buf[32];
2704 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2705 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2707 mode = check_intra_mode(s, mode, &a, ptr_r,
2708 s->frames[CUR_FRAME].tf.f->linesize[0],
2709 ptr, s->y_stride, l,
2710 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2711 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2713 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2714 s->block + 16 * n * bytesperpixel, eob);
2716 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2717 dst += 4 * step1d * s->y_stride;
2724 step = 1 << (b->uvtx * 2);
2725 for (p = 0; p < 2; p++) {
2726 dst = s->dst[1 + p];
2727 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2728 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2729 uint8_t *ptr = dst, *ptr_r = dst_r;
2730 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2731 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2732 int mode = b->uvmode;
2733 uint8_t *a = &a_buf[32];
2734 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2736 mode = check_intra_mode(s, mode, &a, ptr_r,
2737 s->frames[CUR_FRAME].tf.f->linesize[1],
2738 ptr, s->uv_stride, l, col, x, w4, row, y,
2739 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2740 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2742 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2743 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2745 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2746 dst += 4 * uvstep1d * s->uv_stride;
2751 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2753 intra_recon(ctx, y_off, uv_off, 1);
2756 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2758 intra_recon(ctx, y_off, uv_off, 2);
2761 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2762 uint8_t *dst, ptrdiff_t dst_stride,
2763 const uint8_t *ref, ptrdiff_t ref_stride,
2764 ThreadFrame *ref_frame,
2765 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2766 int px, int py, int pw, int ph,
2767 int bw, int bh, int w, int h, int bytesperpixel,
2768 const uint16_t *scale, const uint8_t *step)
2770 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2772 int refbw_m1, refbh_m1;
2776 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2777 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2778 // BUG libvpx seems to scale the two components separately. This introduces
2779 // rounding errors but we have to reproduce them to be exactly compatible
2780 // with the output from libvpx...
2781 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2782 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2786 ref += y * ref_stride + x * bytesperpixel;
2789 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2790 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2791 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2792 // we use +7 because the last 7 pixels of each sbrow can be changed in
2793 // the longest loopfilter of the next sbrow
2794 th = (y + refbh_m1 + 4 + 7) >> 6;
2795 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2796 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2797 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2798 ref - 3 * ref_stride - 3 * bytesperpixel,
2800 refbw_m1 + 8, refbh_m1 + 8,
2801 x - 3, y - 3, w, h);
2802 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2805 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2808 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2809 uint8_t *dst_u, uint8_t *dst_v,
2810 ptrdiff_t dst_stride,
2811 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2812 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2813 ThreadFrame *ref_frame,
2814 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2815 int px, int py, int pw, int ph,
2816 int bw, int bh, int w, int h, int bytesperpixel,
2817 const uint16_t *scale, const uint8_t *step)
2820 int refbw_m1, refbh_m1;
2825 // BUG https://code.google.com/p/webm/issues/detail?id=820
2826 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2827 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2829 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2830 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2833 // BUG https://code.google.com/p/webm/issues/detail?id=820
2834 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2835 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2837 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2838 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2843 ref_u += y * src_stride_u + x * bytesperpixel;
2844 ref_v += y * src_stride_v + x * bytesperpixel;
2847 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2848 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2849 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2850 // we use +7 because the last 7 pixels of each sbrow can be changed in
2851 // the longest loopfilter of the next sbrow
2852 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2853 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2854 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2855 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2856 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2858 refbw_m1 + 8, refbh_m1 + 8,
2859 x - 3, y - 3, w, h);
2860 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2861 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2863 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2864 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2866 refbw_m1 + 8, refbh_m1 + 8,
2867 x - 3, y - 3, w, h);
2868 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2869 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2871 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2872 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2876 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2877 px, py, pw, ph, bw, bh, w, h, i) \
2878 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2879 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2880 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2881 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2882 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2883 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2884 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2885 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2887 #define FN(x) x##_scaled_8bpp
2888 #define BYTES_PER_PIXEL 1
2889 #include "vp9_mc_template.c"
2891 #undef BYTES_PER_PIXEL
2892 #define FN(x) x##_scaled_16bpp
2893 #define BYTES_PER_PIXEL 2
2894 #include "vp9_mc_template.c"
2896 #undef mc_chroma_dir
2898 #undef BYTES_PER_PIXEL
2901 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2902 uint8_t *dst, ptrdiff_t dst_stride,
2903 const uint8_t *ref, ptrdiff_t ref_stride,
2904 ThreadFrame *ref_frame,
2905 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2906 int bw, int bh, int w, int h, int bytesperpixel)
2908 int mx = mv->x, my = mv->y, th;
2912 ref += y * ref_stride + x * bytesperpixel;
2915 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2916 // we use +7 because the last 7 pixels of each sbrow can be changed in
2917 // the longest loopfilter of the next sbrow
2918 th = (y + bh + 4 * !!my + 7) >> 6;
2919 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2920 if (x < !!mx * 3 || y < !!my * 3 ||
2921 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2922 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2923 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2925 bw + !!mx * 7, bh + !!my * 7,
2926 x - !!mx * 3, y - !!my * 3, w, h);
2927 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2930 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2933 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2934 uint8_t *dst_u, uint8_t *dst_v,
2935 ptrdiff_t dst_stride,
2936 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2937 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2938 ThreadFrame *ref_frame,
2939 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2940 int bw, int bh, int w, int h, int bytesperpixel)
2942 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2946 ref_u += y * src_stride_u + x * bytesperpixel;
2947 ref_v += y * src_stride_v + x * bytesperpixel;
2950 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2951 // we use +7 because the last 7 pixels of each sbrow can be changed in
2952 // the longest loopfilter of the next sbrow
2953 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2954 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2955 if (x < !!mx * 3 || y < !!my * 3 ||
2956 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2957 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2958 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2960 bw + !!mx * 7, bh + !!my * 7,
2961 x - !!mx * 3, y - !!my * 3, w, h);
2962 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2963 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2965 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2966 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2968 bw + !!mx * 7, bh + !!my * 7,
2969 x - !!mx * 3, y - !!my * 3, w, h);
2970 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2971 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2973 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2974 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2978 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2979 px, py, pw, ph, bw, bh, w, h, i) \
2980 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2981 mv, bw, bh, w, h, bytesperpixel)
2982 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2983 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2984 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2985 row, col, mv, bw, bh, w, h, bytesperpixel)
2987 #define FN(x) x##_8bpp
2988 #define BYTES_PER_PIXEL 1
2989 #include "vp9_mc_template.c"
2991 #undef BYTES_PER_PIXEL
2992 #define FN(x) x##_16bpp
2993 #define BYTES_PER_PIXEL 2
2994 #include "vp9_mc_template.c"
2995 #undef mc_luma_dir_dir
2996 #undef mc_chroma_dir_dir
2998 #undef BYTES_PER_PIXEL
3001 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3003 VP9Context *s = ctx->priv_data;
3005 int row = s->row, col = s->col;
3007 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3008 if (bytesperpixel == 1) {
3009 inter_pred_scaled_8bpp(ctx);
3011 inter_pred_scaled_16bpp(ctx);
3014 if (bytesperpixel == 1) {
3015 inter_pred_8bpp(ctx);
3017 inter_pred_16bpp(ctx);
3021 /* mostly copied intra_recon() */
3023 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3024 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3025 int end_x = FFMIN(2 * (s->cols - col), w4);
3026 int end_y = FFMIN(2 * (s->rows - row), h4);
3027 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3028 int uvstep1d = 1 << b->uvtx, p;
3029 uint8_t *dst = s->dst[0];
3032 for (n = 0, y = 0; y < end_y; y += step1d) {
3034 for (x = 0; x < end_x; x += step1d,
3035 ptr += 4 * step1d * bytesperpixel, n += step) {
3036 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3039 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3040 s->block + 16 * n * bytesperpixel, eob);
3042 dst += 4 * s->y_stride * step1d;
3048 step = 1 << (b->uvtx * 2);
3049 for (p = 0; p < 2; p++) {
3050 dst = s->dst[p + 1];
3051 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3053 for (x = 0; x < end_x; x += uvstep1d,
3054 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3055 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3058 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3059 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3061 dst += 4 * uvstep1d * s->uv_stride;
3067 static void inter_recon_8bpp(AVCodecContext *ctx)
3069 inter_recon(ctx, 1);
3072 static void inter_recon_16bpp(AVCodecContext *ctx)
3074 inter_recon(ctx, 2);
3077 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3078 int row_and_7, int col_and_7,
3079 int w, int h, int col_end, int row_end,
3080 enum TxfmMode tx, int skip_inter)
3082 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3083 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3085 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3086 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3087 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3088 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3090 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3091 // edges. This means that for UV, we work on two subsampled blocks at
3092 // a time, and we only use the topleft block's mode information to set
3093 // things like block strength. Thus, for any block size smaller than
3094 // 16x16, ignore the odd portion of the block.
3095 if (tx == TX_4X4 && (ss_v | ss_h)) {
3110 if (tx == TX_4X4 && !skip_inter) {
3111 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3112 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3113 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3115 for (y = row_and_7; y < h + row_and_7; y++) {
3116 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3118 mask[0][y][1] |= m_row_8;
3119 mask[0][y][2] |= m_row_4;
3120 // for odd lines, if the odd col is not being filtered,
3121 // skip odd row also:
3128 // if a/c are even row/col and b/d are odd, and d is skipped,
3129 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3130 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3131 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3133 mask[1][y][col_mask_id] |= m_col;
3136 mask[0][y][3] |= m_col;
3138 if (ss_h && (col_end & 1))
3139 mask[1][y][3] |= (t << (w - 1)) - t;
3141 mask[1][y][3] |= m_col;
3145 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3148 int mask_id = (tx == TX_8X8);
3149 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3150 int l2 = tx + ss_h - 1, step1d;
3151 int m_row = m_col & masks[l2];
3153 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3154 // 8wd loopfilter to prevent going off the visible edge.
3155 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3156 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3157 int m_row_8 = m_row - m_row_16;
3159 for (y = row_and_7; y < h + row_and_7; y++) {
3160 mask[0][y][0] |= m_row_16;
3161 mask[0][y][1] |= m_row_8;
3164 for (y = row_and_7; y < h + row_and_7; y++)
3165 mask[0][y][mask_id] |= m_row;
3170 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3171 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3172 mask[1][y][0] |= m_col;
3173 if (y - row_and_7 == h - 1)
3174 mask[1][y][1] |= m_col;
3176 for (y = row_and_7; y < h + row_and_7; y += step1d)
3177 mask[1][y][mask_id] |= m_col;
3179 } else if (tx != TX_4X4) {
3182 mask_id = (tx == TX_8X8) || (h == ss_v);
3183 mask[1][row_and_7][mask_id] |= m_col;
3184 mask_id = (tx == TX_8X8) || (w == ss_h);
3185 for (y = row_and_7; y < h + row_and_7; y++)
3186 mask[0][y][mask_id] |= t;
3188 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3190 for (y = row_and_7; y < h + row_and_7; y++) {
3191 mask[0][y][2] |= t4;
3192 mask[0][y][1] |= t8;
3194 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3199 static void decode_b(AVCodecContext *ctx, int row, int col,
3200 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3201 enum BlockLevel bl, enum BlockPartition bp)
3203 VP9Context *s = ctx->priv_data;
3205 enum BlockSize bs = bl * 3 + bp;
3206 int bytesperpixel = s->bytesperpixel;
3207 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3209 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3215 s->min_mv.x = -(128 + col * 64);
3216 s->min_mv.y = -(128 + row * 64);
3217 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3218 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3224 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3225 (s->ss_v && h4 * 2 == (1 << b->tx)));
3230 if (bytesperpixel == 1) {
3231 has_coeffs = decode_coeffs_8bpp(ctx);
3233 has_coeffs = decode_coeffs_16bpp(ctx);
3235 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3237 memset(&s->above_skip_ctx[col], 1, w4);
3238 memset(&s->left_skip_ctx[s->row7], 1, h4);
3243 #define SPLAT_ZERO_CTX(v, n) \
3245 case 1: v = 0; break; \
3246 case 2: AV_ZERO16(&v); break; \
3247 case 4: AV_ZERO32(&v); break; \
3248 case 8: AV_ZERO64(&v); break; \
3249 case 16: AV_ZERO128(&v); break; \
3251 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3253 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3254 if (s->ss_##dir2) { \
3255 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3256 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3258 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3259 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3264 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3265 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3266 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3267 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3270 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3271 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3272 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3273 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3278 s->block += w4 * h4 * 64 * bytesperpixel;
3279 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3280 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3281 s->eob += 4 * w4 * h4;
3282 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3283 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3289 // emulated overhangs if the stride of the target buffer can't hold. This
3290 // makes it possible to support emu-edge and so on even if we have large block
3292 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3293 (row + h4) > s->rows;
3294 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3295 (row + h4) > s->rows;
3297 s->dst[0] = s->tmp_y;
3300 s->dst[0] = f->data[0] + yoff;
3301 s->y_stride = f->linesize[0];
3304 s->dst[1] = s->tmp_uv[0];
3305 s->dst[2] = s->tmp_uv[1];
3308 s->dst[1] = f->data[1] + uvoff;
3309 s->dst[2] = f->data[2] + uvoff;
3310 s->uv_stride = f->linesize[1];
3314 intra_recon_16bpp(ctx, yoff, uvoff);
3316 intra_recon_8bpp(ctx, yoff, uvoff);
3320 inter_recon_16bpp(ctx);
3322 inter_recon_8bpp(ctx);
3326 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3328 for (n = 0; o < w; n++) {
3333 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3334 s->tmp_y + o, 128, h, 0, 0);
3335 o += bw * bytesperpixel;
3340 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3341 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3343 for (n = s->ss_h; o < w; n++) {
3348 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3349 s->tmp_uv[0] + o, 128, h, 0, 0);
3350 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3351 s->tmp_uv[1] + o, 128, h, 0, 0);
3352 o += bw * bytesperpixel;
3357 // pick filter level and find edges to apply filter to
3358 if (s->filter.level &&
3359 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3360 [b->mode[3] != ZEROMV]) > 0) {
3361 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3362 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3364 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3365 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3366 if (s->ss_h || s->ss_v)
3367 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3368 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3369 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3370 b->uvtx, skip_inter);
3372 if (!s->filter.lim_lut[lvl]) {
3373 int sharp = s->filter.sharpness;
3377 limit >>= (sharp + 3) >> 2;
3378 limit = FFMIN(limit, 9 - sharp);
3380 limit = FFMAX(limit, 1);
3382 s->filter.lim_lut[lvl] = limit;
3383 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3389 s->block += w4 * h4 * 64 * bytesperpixel;
3390 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3391 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3392 s->eob += 4 * w4 * h4;
3393 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3394 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3398 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3399 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3401 VP9Context *s = ctx->priv_data;
3402 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3403 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3404 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3405 s->prob.p.partition[bl][c];
3406 enum BlockPartition bp;
3407 ptrdiff_t hbs = 4 >> bl;
3408 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3409 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3410 int bytesperpixel = s->bytesperpixel;
3413 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3414 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3415 } else if (col + hbs < s->cols) { // FIXME why not <=?
3416 if (row + hbs < s->rows) { // FIXME why not <=?
3417 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3419 case PARTITION_NONE:
3420 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3424 yoff += hbs * 8 * y_stride;
3425 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3426 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3429 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3430 yoff += hbs * 8 * bytesperpixel;
3431 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3432 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3434 case PARTITION_SPLIT:
3435 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3436 decode_sb(ctx, row, col + hbs, lflvl,
3437 yoff + 8 * hbs * bytesperpixel,
3438 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3439 yoff += hbs * 8 * y_stride;
3440 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3441 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3442 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3443 yoff + 8 * hbs * bytesperpixel,
3444 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3449 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3450 bp = PARTITION_SPLIT;
3451 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3452 decode_sb(ctx, row, col + hbs, lflvl,
3453 yoff + 8 * hbs * bytesperpixel,
3454 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3457 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3459 } else if (row + hbs < s->rows) { // FIXME why not <=?
3460 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3461 bp = PARTITION_SPLIT;
3462 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3463 yoff += hbs * 8 * y_stride;
3464 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3465 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3468 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3471 bp = PARTITION_SPLIT;
3472 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3474 s->counts.partition[bl][c][bp]++;
3477 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3478 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3480 VP9Context *s = ctx->priv_data;
3482 ptrdiff_t hbs = 4 >> bl;
3483 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3484 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3485 int bytesperpixel = s->bytesperpixel;
3488 av_assert2(b->bl == BL_8X8);
3489 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3490 } else if (s->b->bl == bl) {
3491 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3492 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3493 yoff += hbs * 8 * y_stride;
3494 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3495 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3496 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3497 yoff += hbs * 8 * bytesperpixel;
3498 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3499 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3502 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3503 if (col + hbs < s->cols) { // FIXME why not <=?
3504 if (row + hbs < s->rows) {
3505 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3506 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3507 yoff += hbs * 8 * y_stride;
3508 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3509 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3510 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3511 yoff + 8 * hbs * bytesperpixel,
3512 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3514 yoff += hbs * 8 * bytesperpixel;
3515 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3516 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3518 } else if (row + hbs < s->rows) {
3519 yoff += hbs * 8 * y_stride;
3520 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3521 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3526 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3527 uint8_t *lvl, uint8_t (*mask)[4],
3528 uint8_t *dst, ptrdiff_t ls)
3530 int y, x, bytesperpixel = s->bytesperpixel;
3532 // filter edges between columns (e.g. block1 | block2)
3533 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3534 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3535 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3536 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3537 unsigned hm = hm1 | hm2 | hm13 | hm23;
3539 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3542 int L = *l, H = L >> 4;
3543 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3545 if (hmask1[0] & x) {
3546 if (hmask2[0] & x) {
3547 av_assert2(l[8 << ss_v] == L);
3548 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3550 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3552 } else if (hm2 & x) {
3555 E |= s->filter.mblim_lut[L] << 8;
3556 I |= s->filter.lim_lut[L] << 8;
3557 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3559 [0](ptr, ls, E, I, H);
3561 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3562 [0](ptr, ls, E, I, H);
3564 } else if (hm2 & x) {
3565 int L = l[8 << ss_v], H = L >> 4;
3566 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3568 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3569 [0](ptr + 8 * ls, ls, E, I, H);
3577 int L = *l, H = L >> 4;
3578 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3583 E |= s->filter.mblim_lut[L] << 8;
3584 I |= s->filter.lim_lut[L] << 8;
3585 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3587 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3589 } else if (hm23 & x) {
3590 int L = l[8 << ss_v], H = L >> 4;
3591 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3593 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3601 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3602 uint8_t *lvl, uint8_t (*mask)[4],
3603 uint8_t *dst, ptrdiff_t ls)
3605 int y, x, bytesperpixel = s->bytesperpixel;
3608 // filter edges between rows (e.g. ------)
3610 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3611 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3612 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3614 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3617 int L = *l, H = L >> 4;
3618 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3621 if (vmask[0] & (x << (1 + ss_h))) {
3622 av_assert2(l[1 + ss_h] == L);
3623 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3625 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3627 } else if (vm & (x << (1 + ss_h))) {
3630 E |= s->filter.mblim_lut[L] << 8;
3631 I |= s->filter.lim_lut[L] << 8;
3632 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3633 [!!(vmask[1] & (x << (1 + ss_h)))]
3634 [1](ptr, ls, E, I, H);
3636 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3637 [1](ptr, ls, E, I, H);
3639 } else if (vm & (x << (1 + ss_h))) {
3640 int L = l[1 + ss_h], H = L >> 4;
3641 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3643 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3644 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3649 int L = *l, H = L >> 4;
3650 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3652 if (vm3 & (x << (1 + ss_h))) {
3655 E |= s->filter.mblim_lut[L] << 8;
3656 I |= s->filter.lim_lut[L] << 8;
3657 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3659 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3661 } else if (vm3 & (x << (1 + ss_h))) {
3662 int L = l[1 + ss_h], H = L >> 4;
3663 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3665 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3678 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3679 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3681 VP9Context *s = ctx->priv_data;
3682 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3683 uint8_t *dst = f->data[0] + yoff;
3684 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3685 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3688 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3689 // if you think of them as acting on a 8x8 block max, we can interleave
3690 // each v/h within the single x loop, but that only works if we work on
3691 // 8 pixel blocks, and we won't always do that (we want at least 16px
3692 // to use SSE2 optimizations, perhaps 32 for AVX2)
3694 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3695 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3697 for (p = 0; p < 2; p++) {
3698 dst = f->data[1 + p] + uvoff;
3699 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3700 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3704 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3706 int sb_start = ( idx * n) >> log2_n;
3707 int sb_end = ((idx + 1) * n) >> log2_n;
3708 *start = FFMIN(sb_start, n) << 3;
3709 *end = FFMIN(sb_end, n) << 3;
3712 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3713 int max_count, int update_factor)
3715 unsigned ct = ct0 + ct1, p2, p1;
3721 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3722 p2 = av_clip(p2, 1, 255);
3723 ct = FFMIN(ct, max_count);
3724 update_factor = FASTDIV(update_factor * ct, max_count);
3726 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3727 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3730 static void adapt_probs(VP9Context *s)
3733 prob_context *p = &s->prob_ctx[s->framectxid].p;
3734 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3737 for (i = 0; i < 4; i++)
3738 for (j = 0; j < 2; j++)
3739 for (k = 0; k < 2; k++)
3740 for (l = 0; l < 6; l++)
3741 for (m = 0; m < 6; m++) {
3742 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3743 unsigned *e = s->counts.eob[i][j][k][l][m];
3744 unsigned *c = s->counts.coef[i][j][k][l][m];
3746 if (l == 0 && m >= 3) // dc only has 3 pt
3749 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3750 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3751 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3754 if (s->keyframe || s->intraonly) {
3755 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3756 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3757 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3758 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3763 for (i = 0; i < 3; i++)
3764 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3767 for (i = 0; i < 4; i++)
3768 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3771 if (s->comppredmode == PRED_SWITCHABLE) {
3772 for (i = 0; i < 5; i++)
3773 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3777 if (s->comppredmode != PRED_SINGLEREF) {
3778 for (i = 0; i < 5; i++)
3779 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3780 s->counts.comp_ref[i][1], 20, 128);
3783 if (s->comppredmode != PRED_COMPREF) {
3784 for (i = 0; i < 5; i++) {
3785 uint8_t *pp = p->single_ref[i];
3786 unsigned (*c)[2] = s->counts.single_ref[i];
3788 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3789 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3793 // block partitioning
3794 for (i = 0; i < 4; i++)
3795 for (j = 0; j < 4; j++) {
3796 uint8_t *pp = p->partition[i][j];
3797 unsigned *c = s->counts.partition[i][j];
3799 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3800 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3801 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3805 if (s->txfmmode == TX_SWITCHABLE) {
3806 for (i = 0; i < 2; i++) {
3807 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3809 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3810 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3811 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3812 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3813 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3814 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3818 // interpolation filter
3819 if (s->filtermode == FILTER_SWITCHABLE) {
3820 for (i = 0; i < 4; i++) {
3821 uint8_t *pp = p->filter[i];
3822 unsigned *c = s->counts.filter[i];
3824 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3825 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3830 for (i = 0; i < 7; i++) {
3831 uint8_t *pp = p->mv_mode[i];
3832 unsigned *c = s->counts.mv_mode[i];
3834 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3835 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3836 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3841 uint8_t *pp = p->mv_joint;
3842 unsigned *c = s->counts.mv_joint;
3844 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3845 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3846 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3850 for (i = 0; i < 2; i++) {
3852 unsigned *c, (*c2)[2], sum;
3854 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3855 s->counts.mv_comp[i].sign[1], 20, 128);
3857 pp = p->mv_comp[i].classes;
3858 c = s->counts.mv_comp[i].classes;
3859 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3860 adapt_prob(&pp[0], c[0], sum, 20, 128);
3862 adapt_prob(&pp[1], c[1], sum, 20, 128);
3864 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3865 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3867 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3868 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3870 adapt_prob(&pp[6], c[6], sum, 20, 128);
3871 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3872 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3873 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3875 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3876 s->counts.mv_comp[i].class0[1], 20, 128);
3877 pp = p->mv_comp[i].bits;
3878 c2 = s->counts.mv_comp[i].bits;
3879 for (j = 0; j < 10; j++)
3880 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3882 for (j = 0; j < 2; j++) {
3883 pp = p->mv_comp[i].class0_fp[j];
3884 c = s->counts.mv_comp[i].class0_fp[j];
3885 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3886 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3887 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3889 pp = p->mv_comp[i].fp;
3890 c = s->counts.mv_comp[i].fp;
3891 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3892 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3893 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3895 if (s->highprecisionmvs) {
3896 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3897 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3898 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3899 s->counts.mv_comp[i].hp[1], 20, 128);
3904 for (i = 0; i < 4; i++) {
3905 uint8_t *pp = p->y_mode[i];
3906 unsigned *c = s->counts.y_mode[i], sum, s2;
3908 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3909 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3910 sum -= c[TM_VP8_PRED];
3911 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3912 sum -= c[VERT_PRED];
3913 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3914 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3916 adapt_prob(&pp[3], s2, sum, 20, 128);
3918 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3919 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3920 sum -= c[DIAG_DOWN_LEFT_PRED];
3921 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3922 sum -= c[VERT_LEFT_PRED];
3923 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3924 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3928 for (i = 0; i < 10; i++) {
3929 uint8_t *pp = p->uv_mode[i];
3930 unsigned *c = s->counts.uv_mode[i], sum, s2;
3932 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3933 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3934 sum -= c[TM_VP8_PRED];
3935 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3936 sum -= c[VERT_PRED];
3937 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3938 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3940 adapt_prob(&pp[3], s2, sum, 20, 128);
3942 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3943 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3944 sum -= c[DIAG_DOWN_LEFT_PRED];
3945 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3946 sum -= c[VERT_LEFT_PRED];
3947 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3948 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3952 static void free_buffers(VP9Context *s)
3954 av_freep(&s->intra_pred_data[0]);
3955 av_freep(&s->b_base);
3956 av_freep(&s->block_base);
3959 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3961 VP9Context *s = ctx->priv_data;
3964 for (i = 0; i < 3; i++) {
3965 if (s->frames[i].tf.f->data[0])
3966 vp9_unref_frame(ctx, &s->frames[i]);
3967 av_frame_free(&s->frames[i].tf.f);
3969 for (i = 0; i < 8; i++) {
3970 if (s->refs[i].f->data[0])
3971 ff_thread_release_buffer(ctx, &s->refs[i]);
3972 av_frame_free(&s->refs[i].f);
3973 if (s->next_refs[i].f->data[0])
3974 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3975 av_frame_free(&s->next_refs[i].f);
3985 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3986 int *got_frame, AVPacket *pkt)
3988 const uint8_t *data = pkt->data;
3989 int size = pkt->size;
3990 VP9Context *s = ctx->priv_data;
3991 int res, tile_row, tile_col, i, ref, row, col;
3992 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map
3993 && s->frames[REF_FRAME_SEGMAP].segmentation_map;
3994 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3998 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4000 } else if (res == 0) {
4001 if (!s->refs[ref].f->data[0]) {
4002 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4003 return AVERROR_INVALIDDATA;
4005 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4007 ((AVFrame *)frame)->pkt_pts = pkt->pts;
4008 ((AVFrame *)frame)->pkt_dts = pkt->dts;
4009 for (i = 0; i < 8; i++) {
4010 if (s->next_refs[i].f->data[0])
4011 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4012 if (s->refs[i].f->data[0] &&
4013 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4022 if (!retain_segmap_ref) {
4023 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4024 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4025 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4026 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4029 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4030 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4031 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4032 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4034 if (s->frames[CUR_FRAME].tf.f->data[0])
4035 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4036 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4038 f = s->frames[CUR_FRAME].tf.f;
4039 f->key_frame = s->keyframe;
4040 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4041 ls_y = f->linesize[0];
4042 ls_uv =f->linesize[1];
4045 for (i = 0; i < 8; i++) {
4046 if (s->next_refs[i].f->data[0])
4047 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4048 if (s->refreshrefmask & (1 << i)) {
4049 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4050 } else if (s->refs[i].f->data[0]) {
4051 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4057 // main tile decode loop
4058 bytesperpixel = s->bytesperpixel;
4059 memset(s->above_partition_ctx, 0, s->cols);
4060 memset(s->above_skip_ctx, 0, s->cols);
4061 if (s->keyframe || s->intraonly) {
4062 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4064 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4066 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4067 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4068 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4069 memset(s->above_segpred_ctx, 0, s->cols);
4070 s->pass = s->frames[CUR_FRAME].uses_2pass =
4071 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4072 if ((res = update_block_buffers(ctx)) < 0) {
4073 av_log(ctx, AV_LOG_ERROR,
4074 "Failed to allocate block buffers\n");
4077 if (s->refreshctx && s->parallelmode) {
4080 for (i = 0; i < 4; i++) {
4081 for (j = 0; j < 2; j++)
4082 for (k = 0; k < 2; k++)
4083 for (l = 0; l < 6; l++)
4084 for (m = 0; m < 6; m++)
4085 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4086 s->prob.coef[i][j][k][l][m], 3);
4087 if (s->txfmmode == i)
4090 s->prob_ctx[s->framectxid].p = s->prob.p;
4091 ff_thread_finish_setup(ctx);
4092 } else if (!s->refreshctx) {
4093 ff_thread_finish_setup(ctx);
4099 s->block = s->block_base;
4100 s->uvblock[0] = s->uvblock_base[0];
4101 s->uvblock[1] = s->uvblock_base[1];
4102 s->eob = s->eob_base;
4103 s->uveob[0] = s->uveob_base[0];
4104 s->uveob[1] = s->uveob_base[1];
4106 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4107 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4108 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4110 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4113 if (tile_col == s->tiling.tile_cols - 1 &&
4114 tile_row == s->tiling.tile_rows - 1) {
4117 tile_size = AV_RB32(data);
4121 if (tile_size > size) {
4122 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4123 return AVERROR_INVALIDDATA;
4125 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4126 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4127 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4128 return AVERROR_INVALIDDATA;
4135 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4136 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4137 struct VP9Filter *lflvl_ptr = s->lflvl;
4138 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4140 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4141 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4142 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4145 memset(s->left_partition_ctx, 0, 8);
4146 memset(s->left_skip_ctx, 0, 8);
4147 if (s->keyframe || s->intraonly) {
4148 memset(s->left_mode_ctx, DC_PRED, 16);
4150 memset(s->left_mode_ctx, NEARESTMV, 8);
4152 memset(s->left_y_nnz_ctx, 0, 16);
4153 memset(s->left_uv_nnz_ctx, 0, 32);
4154 memset(s->left_segpred_ctx, 0, 8);
4156 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4159 for (col = s->tiling.tile_col_start;
4160 col < s->tiling.tile_col_end;
4161 col += 8, yoff2 += 64 * bytesperpixel,
4162 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4163 // FIXME integrate with lf code (i.e. zero after each
4164 // use, similar to invtxfm coefficients, or similar)
4166 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4170 decode_sb_mem(ctx, row, col, lflvl_ptr,
4171 yoff2, uvoff2, BL_64X64);
4173 decode_sb(ctx, row, col, lflvl_ptr,
4174 yoff2, uvoff2, BL_64X64);
4178 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4186 // backup pre-loopfilter reconstruction data for intra
4187 // prediction of next row of sb64s
4188 if (row + 8 < s->rows) {
4189 memcpy(s->intra_pred_data[0],
4190 f->data[0] + yoff + 63 * ls_y,
4191 8 * s->cols * bytesperpixel);
4192 memcpy(s->intra_pred_data[1],
4193 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4194 8 * s->cols * bytesperpixel >> s->ss_h);
4195 memcpy(s->intra_pred_data[2],
4196 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4197 8 * s->cols * bytesperpixel >> s->ss_h);
4200 // loopfilter one row
4201 if (s->filter.level) {
4204 lflvl_ptr = s->lflvl;
4205 for (col = 0; col < s->cols;
4206 col += 8, yoff2 += 64 * bytesperpixel,
4207 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4208 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4212 // FIXME maybe we can make this more finegrained by running the
4213 // loopfilter per-block instead of after each sbrow
4214 // In fact that would also make intra pred left preparation easier?
4215 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4219 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4221 ff_thread_finish_setup(ctx);
4223 } while (s->pass++ == 1);
4224 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4227 for (i = 0; i < 8; i++) {
4228 if (s->refs[i].f->data[0])
4229 ff_thread_release_buffer(ctx, &s->refs[i]);
4230 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4233 if (!s->invisible) {
4234 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4242 static void vp9_decode_flush(AVCodecContext *ctx)
4244 VP9Context *s = ctx->priv_data;
4247 for (i = 0; i < 3; i++)
4248 vp9_unref_frame(ctx, &s->frames[i]);
4249 for (i = 0; i < 8; i++)
4250 ff_thread_release_buffer(ctx, &s->refs[i]);
4253 static int init_frames(AVCodecContext *ctx)
4255 VP9Context *s = ctx->priv_data;
4258 for (i = 0; i < 3; i++) {
4259 s->frames[i].tf.f = av_frame_alloc();
4260 if (!s->frames[i].tf.f) {
4261 vp9_decode_free(ctx);
4262 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4263 return AVERROR(ENOMEM);
4266 for (i = 0; i < 8; i++) {
4267 s->refs[i].f = av_frame_alloc();
4268 s->next_refs[i].f = av_frame_alloc();
4269 if (!s->refs[i].f || !s->next_refs[i].f) {
4270 vp9_decode_free(ctx);
4271 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4272 return AVERROR(ENOMEM);
4279 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4281 VP9Context *s = ctx->priv_data;
4283 ctx->internal->allocate_progress = 1;
4285 s->filter.sharpness = -1;
4287 return init_frames(ctx);
4290 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4292 return init_frames(avctx);
4295 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4298 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4300 // detect size changes in other threads
4301 if (s->intra_pred_data[0] &&
4302 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4306 for (i = 0; i < 3; i++) {
4307 if (s->frames[i].tf.f->data[0])
4308 vp9_unref_frame(dst, &s->frames[i]);
4309 if (ssrc->frames[i].tf.f->data[0]) {
4310 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4314 for (i = 0; i < 8; i++) {
4315 if (s->refs[i].f->data[0])
4316 ff_thread_release_buffer(dst, &s->refs[i]);
4317 if (ssrc->next_refs[i].f->data[0]) {
4318 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4323 s->invisible = ssrc->invisible;
4324 s->keyframe = ssrc->keyframe;
4325 s->ss_v = ssrc->ss_v;
4326 s->ss_h = ssrc->ss_h;
4327 s->segmentation.enabled = ssrc->segmentation.enabled;
4328 s->segmentation.update_map = ssrc->segmentation.update_map;
4329 s->bytesperpixel = ssrc->bytesperpixel;
4331 s->bpp_index = ssrc->bpp_index;
4332 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4333 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4334 if (ssrc->segmentation.enabled) {
4335 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4336 sizeof(s->segmentation.feat));
4342 static const AVProfile profiles[] = {
4343 { FF_PROFILE_VP9_0, "Profile 0" },
4344 { FF_PROFILE_VP9_1, "Profile 1" },
4345 { FF_PROFILE_VP9_2, "Profile 2" },
4346 { FF_PROFILE_VP9_3, "Profile 3" },
4347 { FF_PROFILE_UNKNOWN },
4350 AVCodec ff_vp9_decoder = {
4352 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4353 .type = AVMEDIA_TYPE_VIDEO,
4354 .id = AV_CODEC_ID_VP9,
4355 .priv_data_size = sizeof(VP9Context),
4356 .init = vp9_decode_init,
4357 .close = vp9_decode_free,
4358 .decode = vp9_decode_frame,
4359 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4360 .flush = vp9_decode_flush,
4361 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4362 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4363 .profiles = NULL_IF_CONFIG_SMALL(profiles),