2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
156 uint8_t ignore_refmap;
161 uint8_t skip_enabled;
170 unsigned log2_tile_cols, log2_tile_rows;
171 unsigned tile_cols, tile_rows;
172 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174 unsigned sb_cols, sb_rows, rows, cols;
177 uint8_t coef[4][2][2][6][6][3];
181 uint8_t coef[4][2][2][6][6][11];
186 unsigned y_mode[4][10];
187 unsigned uv_mode[10][10];
188 unsigned filter[4][3];
189 unsigned mv_mode[7][4];
190 unsigned intra[4][2];
192 unsigned single_ref[5][2][2];
193 unsigned comp_ref[5][2];
194 unsigned tx32p[2][4];
195 unsigned tx16p[2][3];
198 unsigned mv_joint[4];
201 unsigned classes[11];
203 unsigned bits[10][2];
204 unsigned class0_fp[2][4];
206 unsigned class0_hp[2];
209 unsigned partition[4][4][4];
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
213 enum TxfmMode txfmmode;
214 enum CompPredMode comppredmode;
216 // contextual (left/above) cache
217 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229 uint8_t *above_partition_ctx;
230 uint8_t *above_mode_ctx;
231 // FIXME maybe merge some of the below in a flags field?
232 uint8_t *above_y_nnz_ctx;
233 uint8_t *above_uv_nnz_ctx[2];
234 uint8_t *above_skip_ctx; // 1bit
235 uint8_t *above_txfm_ctx; // 2bit
236 uint8_t *above_segpred_ctx; // 1bit
237 uint8_t *above_intra_ctx; // 1bit
238 uint8_t *above_comp_ctx; // 1bit
239 uint8_t *above_ref_ctx; // 2bit
240 uint8_t *above_filter_ctx;
241 VP56mv (*above_mv_ctx)[2];
244 uint8_t *intra_pred_data[3];
245 struct VP9Filter *lflvl;
246 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248 // block reconstruction intermediates
249 int block_alloc_using_2pass;
250 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252 struct { int x, y; } min_mv, max_mv;
253 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255 uint16_t mvscale[3][2];
256 uint8_t mvstep[3][2];
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271 VP9Context *s = ctx->priv_data;
274 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276 sz = 64 * s->sb_cols * s->sb_rows;
277 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278 ff_thread_release_buffer(ctx, &f->tf);
279 return AVERROR(ENOMEM);
282 f->segmentation_map = f->extradata->data;
283 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
307 dst->uses_2pass = src->uses_2pass;
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 VP9Context *s = ctx->priv_data;
316 int bytesperpixel = s->bytesperpixel;
318 av_assert0(w > 0 && h > 0);
320 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326 s->sb_cols = (w + 63) >> 6;
327 s->sb_rows = (h + 63) >> 6;
328 s->cols = (w + 7) >> 3;
329 s->rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332 av_freep(&s->intra_pred_data[0]);
333 // FIXME we slightly over-allocate here for subsampled chroma, but a little
334 // bit of padding shouldn't affect performance...
335 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338 return AVERROR(ENOMEM);
339 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342 assign(s->above_y_nnz_ctx, uint8_t *, 16);
343 assign(s->above_mode_ctx, uint8_t *, 16);
344 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347 assign(s->above_partition_ctx, uint8_t *, 8);
348 assign(s->above_skip_ctx, uint8_t *, 8);
349 assign(s->above_txfm_ctx, uint8_t *, 8);
350 assign(s->above_segpred_ctx, uint8_t *, 8);
351 assign(s->above_intra_ctx, uint8_t *, 8);
352 assign(s->above_comp_ctx, uint8_t *, 8);
353 assign(s->above_ref_ctx, uint8_t *, 8);
354 assign(s->above_filter_ctx, uint8_t *, 8);
355 assign(s->lflvl, struct VP9Filter *, 1);
358 // these will be re-allocated a little later
359 av_freep(&s->b_base);
360 av_freep(&s->block_base);
362 if (s->bpp != s->last_bpp) {
363 ff_vp9dsp_init(&s->dsp, s->bpp);
364 ff_videodsp_init(&s->vdsp, s->bpp);
365 s->last_bpp = s->bpp;
371 static int update_block_buffers(AVCodecContext *ctx)
373 VP9Context *s = ctx->priv_data;
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380 av_free(s->block_base);
381 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383 if (s->frames[CUR_FRAME].uses_2pass) {
384 int sbs = s->sb_cols * s->sb_rows;
386 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388 16 * 16 + 2 * chroma_eobs) * sbs);
389 if (!s->b_base || !s->block_base)
390 return AVERROR(ENOMEM);
391 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397 s->b_base = av_malloc(sizeof(VP9Block));
398 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399 16 * 16 + 2 * chroma_eobs);
400 if (!s->b_base || !s->block_base)
401 return AVERROR(ENOMEM);
402 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405 s->uveob_base[0] = s->eob_base + 16 * 16;
406 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
414 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416 int v = get_bits(gb, n);
417 return get_bits1(gb) ? -v : v;
420 static av_always_inline int inv_recenter_nonneg(int v, int m)
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
428 static const int inv_map_table[254] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 /* This code is trying to do a differential probability update. For a
452 * current probability A in the range [1, 255], the difference to a new
453 * probability of any value can be expressed differentially as 1-A,255-A
454 * where some part of this (absolute range) exists both in positive as
455 * well as the negative part, whereas another part only exists in one
456 * half. We're trying to code this shared part differentially, i.e.
457 * times two where the value of the lowest bit specifies the sign, and
458 * the single part is then coded on top of this. This absolute difference
459 * then again has a value of [0,254], but a bigger value in this range
460 * indicates that we're further away from the original value A, so we
461 * can code this as a VLC code, since higher values are increasingly
462 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463 * updates vs. the 'fine, exact' updates further down the range, which
464 * adds one extra dimension to this differential update model. */
466 if (!vp8_rac_get(c)) {
467 d = vp8_rac_get_uint(c, 4) + 0;
468 } else if (!vp8_rac_get(c)) {
469 d = vp8_rac_get_uint(c, 4) + 16;
470 } else if (!vp8_rac_get(c)) {
471 d = vp8_rac_get_uint(c, 5) + 32;
473 d = vp8_rac_get_uint(c, 7);
475 d = (d << 1) - 65 + vp8_rac_get(c);
479 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
480 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
483 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
485 static const enum AVColorSpace colorspaces[8] = {
486 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
487 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
489 VP9Context *s = ctx->priv_data;
490 enum AVPixelFormat res;
491 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
494 s->bpp = 8 + bits * 2;
495 s->bytesperpixel = (7 + s->bpp) >> 3;
496 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
497 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
498 static const enum AVPixelFormat pix_fmt_rgb[3] = {
499 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
501 if (ctx->profile & 1) {
502 s->ss_h = s->ss_v = 1;
503 res = pix_fmt_rgb[bits];
504 ctx->color_range = AVCOL_RANGE_JPEG;
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
508 return AVERROR_INVALIDDATA;
511 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
512 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
513 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
514 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
515 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
516 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
517 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
519 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
520 if (ctx->profile & 1) {
521 s->ss_h = get_bits1(&s->gb);
522 s->ss_v = get_bits1(&s->gb);
523 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
524 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
526 return AVERROR_INVALIDDATA;
527 } else if (get_bits1(&s->gb)) {
528 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
530 return AVERROR_INVALIDDATA;
533 s->ss_h = s->ss_v = 1;
534 res = pix_fmt_for_ss[bits][1][1];
541 static int decode_frame_header(AVCodecContext *ctx,
542 const uint8_t *data, int size, int *ref)
544 VP9Context *s = ctx->priv_data;
545 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
546 enum AVPixelFormat fmt = ctx->pix_fmt;
548 const uint8_t *data2;
551 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
552 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
555 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
556 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
557 return AVERROR_INVALIDDATA;
559 ctx->profile = get_bits1(&s->gb);
560 ctx->profile |= get_bits1(&s->gb) << 1;
561 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
562 if (ctx->profile > 3) {
563 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
564 return AVERROR_INVALIDDATA;
566 if (get_bits1(&s->gb)) {
567 *ref = get_bits(&s->gb, 3);
570 s->last_keyframe = s->keyframe;
571 s->keyframe = !get_bits1(&s->gb);
572 last_invisible = s->invisible;
573 s->invisible = !get_bits1(&s->gb);
574 s->errorres = get_bits1(&s->gb);
575 s->use_last_frame_mvs = !s->errorres && !last_invisible;
577 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
578 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
579 return AVERROR_INVALIDDATA;
581 if ((fmt = read_colorspace_details(ctx)) < 0)
583 // for profile 1, here follows the subsampling bits
584 s->refreshrefmask = 0xff;
585 w = get_bits(&s->gb, 16) + 1;
586 h = get_bits(&s->gb, 16) + 1;
587 if (get_bits1(&s->gb)) // display size
588 skip_bits(&s->gb, 32);
590 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
591 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
593 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
594 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
595 return AVERROR_INVALIDDATA;
597 if (ctx->profile == 1) {
598 if ((fmt = read_colorspace_details(ctx)) < 0)
601 s->ss_h = s->ss_v = 1;
604 s->bytesperpixel = 1;
605 fmt = AV_PIX_FMT_YUV420P;
606 ctx->colorspace = AVCOL_SPC_BT470BG;
607 ctx->color_range = AVCOL_RANGE_JPEG;
609 s->refreshrefmask = get_bits(&s->gb, 8);
610 w = get_bits(&s->gb, 16) + 1;
611 h = get_bits(&s->gb, 16) + 1;
612 if (get_bits1(&s->gb)) // display size
613 skip_bits(&s->gb, 32);
615 s->refreshrefmask = get_bits(&s->gb, 8);
616 s->refidx[0] = get_bits(&s->gb, 3);
617 s->signbias[0] = get_bits1(&s->gb);
618 s->refidx[1] = get_bits(&s->gb, 3);
619 s->signbias[1] = get_bits1(&s->gb);
620 s->refidx[2] = get_bits(&s->gb, 3);
621 s->signbias[2] = get_bits1(&s->gb);
622 if (!s->refs[s->refidx[0]].f->data[0] ||
623 !s->refs[s->refidx[1]].f->data[0] ||
624 !s->refs[s->refidx[2]].f->data[0]) {
625 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
626 return AVERROR_INVALIDDATA;
628 if (get_bits1(&s->gb)) {
629 w = s->refs[s->refidx[0]].f->width;
630 h = s->refs[s->refidx[0]].f->height;
631 } else if (get_bits1(&s->gb)) {
632 w = s->refs[s->refidx[1]].f->width;
633 h = s->refs[s->refidx[1]].f->height;
634 } else if (get_bits1(&s->gb)) {
635 w = s->refs[s->refidx[2]].f->width;
636 h = s->refs[s->refidx[2]].f->height;
638 w = get_bits(&s->gb, 16) + 1;
639 h = get_bits(&s->gb, 16) + 1;
641 // Note that in this code, "CUR_FRAME" is actually before we
642 // have formally allocated a frame, and thus actually represents
644 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
645 s->frames[CUR_FRAME].tf.f->height == h;
646 if (get_bits1(&s->gb)) // display size
647 skip_bits(&s->gb, 32);
648 s->highprecisionmvs = get_bits1(&s->gb);
649 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
651 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
652 s->signbias[0] != s->signbias[2];
653 if (s->allowcompinter) {
654 if (s->signbias[0] == s->signbias[1]) {
656 s->varcompref[0] = 0;
657 s->varcompref[1] = 1;
658 } else if (s->signbias[0] == s->signbias[2]) {
660 s->varcompref[0] = 0;
661 s->varcompref[1] = 2;
664 s->varcompref[0] = 1;
665 s->varcompref[1] = 2;
669 for (i = 0; i < 3; i++) {
670 AVFrame *ref = s->refs[s->refidx[i]].f;
671 int refw = ref->width, refh = ref->height;
673 if (ref->format != fmt) {
674 av_log(ctx, AV_LOG_ERROR,
675 "Ref pixfmt (%s) did not match current frame (%s)",
676 av_get_pix_fmt_name(ref->format),
677 av_get_pix_fmt_name(fmt));
678 return AVERROR_INVALIDDATA;
679 } else if (refw == w && refh == h) {
680 s->mvscale[i][0] = s->mvscale[i][1] = 0;
682 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
683 av_log(ctx, AV_LOG_ERROR,
684 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
686 return AVERROR_INVALIDDATA;
688 s->mvscale[i][0] = (refw << 14) / w;
689 s->mvscale[i][1] = (refh << 14) / h;
690 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
691 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
696 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
697 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
698 s->framectxid = c = get_bits(&s->gb, 2);
700 /* loopfilter header data */
701 s->filter.level = get_bits(&s->gb, 6);
702 sharp = get_bits(&s->gb, 3);
703 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
704 // the old cache values since they are still valid
705 if (s->filter.sharpness != sharp)
706 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
707 s->filter.sharpness = sharp;
708 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
709 if (get_bits1(&s->gb)) {
710 for (i = 0; i < 4; i++)
711 if (get_bits1(&s->gb))
712 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
713 for (i = 0; i < 2; i++)
714 if (get_bits1(&s->gb))
715 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
719 /* quantization header data */
720 s->yac_qi = get_bits(&s->gb, 8);
721 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
722 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
723 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
724 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
725 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
727 /* segmentation header info */
728 s->segmentation.ignore_refmap = 0;
729 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
730 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
731 for (i = 0; i < 7; i++)
732 s->prob.seg[i] = get_bits1(&s->gb) ?
733 get_bits(&s->gb, 8) : 255;
734 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
735 for (i = 0; i < 3; i++)
736 s->prob.segpred[i] = get_bits1(&s->gb) ?
737 get_bits(&s->gb, 8) : 255;
740 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
741 (w != s->frames[CUR_FRAME].tf.f->width ||
742 h != s->frames[CUR_FRAME].tf.f->height)) {
743 av_log(ctx, AV_LOG_WARNING,
744 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
745 s->segmentation.temporal, s->segmentation.update_map);
746 s->segmentation.ignore_refmap = 1;
747 //return AVERROR_INVALIDDATA;
750 if (get_bits1(&s->gb)) {
751 s->segmentation.absolute_vals = get_bits1(&s->gb);
752 for (i = 0; i < 8; i++) {
753 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
754 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
755 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
756 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
757 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
758 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
759 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
763 s->segmentation.feat[0].q_enabled = 0;
764 s->segmentation.feat[0].lf_enabled = 0;
765 s->segmentation.feat[0].skip_enabled = 0;
766 s->segmentation.feat[0].ref_enabled = 0;
769 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
770 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
771 int qyac, qydc, quvac, quvdc, lflvl, sh;
773 if (s->segmentation.feat[i].q_enabled) {
774 if (s->segmentation.absolute_vals)
775 qyac = s->segmentation.feat[i].q_val;
777 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
781 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
782 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
783 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
784 qyac = av_clip_uintp2(qyac, 8);
786 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
787 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
788 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
789 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
791 sh = s->filter.level >= 32;
792 if (s->segmentation.feat[i].lf_enabled) {
793 if (s->segmentation.absolute_vals)
794 lflvl = s->segmentation.feat[i].lf_val;
796 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
798 lflvl = s->filter.level;
800 if (s->lf_delta.enabled) {
801 s->segmentation.feat[i].lflvl[0][0] =
802 s->segmentation.feat[i].lflvl[0][1] =
803 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
804 for (j = 1; j < 4; j++) {
805 s->segmentation.feat[i].lflvl[j][0] =
806 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
807 s->lf_delta.mode[0]) * (1 << sh)), 6);
808 s->segmentation.feat[i].lflvl[j][1] =
809 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
810 s->lf_delta.mode[1]) * (1 << sh)), 6);
813 memset(s->segmentation.feat[i].lflvl, lflvl,
814 sizeof(s->segmentation.feat[i].lflvl));
819 if ((res = update_size(ctx, w, h, fmt)) < 0) {
820 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
823 for (s->tiling.log2_tile_cols = 0;
824 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
825 s->tiling.log2_tile_cols++) ;
826 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
827 max = FFMAX(0, max - 1);
828 while (max > s->tiling.log2_tile_cols) {
829 if (get_bits1(&s->gb))
830 s->tiling.log2_tile_cols++;
834 s->tiling.log2_tile_rows = decode012(&s->gb);
835 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
836 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
837 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
838 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
839 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
841 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
842 return AVERROR(ENOMEM);
846 if (s->keyframe || s->errorres || s->intraonly) {
847 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
848 s->prob_ctx[3].p = vp9_default_probs;
849 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
850 sizeof(vp9_default_coef_probs));
851 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
852 sizeof(vp9_default_coef_probs));
853 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
854 sizeof(vp9_default_coef_probs));
855 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
856 sizeof(vp9_default_coef_probs));
859 // next 16 bits is size of the rest of the header (arith-coded)
860 size2 = get_bits(&s->gb, 16);
861 data2 = align_get_bits(&s->gb);
862 if (size2 > size - (data2 - data)) {
863 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
864 return AVERROR_INVALIDDATA;
866 ff_vp56_init_range_decoder(&s->c, data2, size2);
867 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
868 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
869 return AVERROR_INVALIDDATA;
872 if (s->keyframe || s->intraonly) {
873 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
875 memset(&s->counts, 0, sizeof(s->counts));
877 // FIXME is it faster to not copy here, but do it down in the fw updates
878 // as explicit copies if the fw update is missing (and skip the copy upon
880 s->prob.p = s->prob_ctx[c].p;
884 s->txfmmode = TX_4X4;
886 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
887 if (s->txfmmode == 3)
888 s->txfmmode += vp8_rac_get(&s->c);
890 if (s->txfmmode == TX_SWITCHABLE) {
891 for (i = 0; i < 2; i++)
892 if (vp56_rac_get_prob_branchy(&s->c, 252))
893 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
894 for (i = 0; i < 2; i++)
895 for (j = 0; j < 2; j++)
896 if (vp56_rac_get_prob_branchy(&s->c, 252))
897 s->prob.p.tx16p[i][j] =
898 update_prob(&s->c, s->prob.p.tx16p[i][j]);
899 for (i = 0; i < 2; i++)
900 for (j = 0; j < 3; j++)
901 if (vp56_rac_get_prob_branchy(&s->c, 252))
902 s->prob.p.tx32p[i][j] =
903 update_prob(&s->c, s->prob.p.tx32p[i][j]);
908 for (i = 0; i < 4; i++) {
909 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
910 if (vp8_rac_get(&s->c)) {
911 for (j = 0; j < 2; j++)
912 for (k = 0; k < 2; k++)
913 for (l = 0; l < 6; l++)
914 for (m = 0; m < 6; m++) {
915 uint8_t *p = s->prob.coef[i][j][k][l][m];
916 uint8_t *r = ref[j][k][l][m];
917 if (m >= 3 && l == 0) // dc only has 3 pt
919 for (n = 0; n < 3; n++) {
920 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
921 p[n] = update_prob(&s->c, r[n]);
929 for (j = 0; j < 2; j++)
930 for (k = 0; k < 2; k++)
931 for (l = 0; l < 6; l++)
932 for (m = 0; m < 6; m++) {
933 uint8_t *p = s->prob.coef[i][j][k][l][m];
934 uint8_t *r = ref[j][k][l][m];
935 if (m > 3 && l == 0) // dc only has 3 pt
941 if (s->txfmmode == i)
946 for (i = 0; i < 3; i++)
947 if (vp56_rac_get_prob_branchy(&s->c, 252))
948 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
949 if (!s->keyframe && !s->intraonly) {
950 for (i = 0; i < 7; i++)
951 for (j = 0; j < 3; j++)
952 if (vp56_rac_get_prob_branchy(&s->c, 252))
953 s->prob.p.mv_mode[i][j] =
954 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
956 if (s->filtermode == FILTER_SWITCHABLE)
957 for (i = 0; i < 4; i++)
958 for (j = 0; j < 2; j++)
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
960 s->prob.p.filter[i][j] =
961 update_prob(&s->c, s->prob.p.filter[i][j]);
963 for (i = 0; i < 4; i++)
964 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
967 if (s->allowcompinter) {
968 s->comppredmode = vp8_rac_get(&s->c);
970 s->comppredmode += vp8_rac_get(&s->c);
971 if (s->comppredmode == PRED_SWITCHABLE)
972 for (i = 0; i < 5; i++)
973 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 update_prob(&s->c, s->prob.p.comp[i]);
977 s->comppredmode = PRED_SINGLEREF;
980 if (s->comppredmode != PRED_COMPREF) {
981 for (i = 0; i < 5; i++) {
982 if (vp56_rac_get_prob_branchy(&s->c, 252))
983 s->prob.p.single_ref[i][0] =
984 update_prob(&s->c, s->prob.p.single_ref[i][0]);
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.single_ref[i][1] =
987 update_prob(&s->c, s->prob.p.single_ref[i][1]);
991 if (s->comppredmode != PRED_SINGLEREF) {
992 for (i = 0; i < 5; i++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.comp_ref[i] =
995 update_prob(&s->c, s->prob.p.comp_ref[i]);
998 for (i = 0; i < 4; i++)
999 for (j = 0; j < 9; j++)
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.y_mode[i][j] =
1002 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1004 for (i = 0; i < 4; i++)
1005 for (j = 0; j < 4; j++)
1006 for (k = 0; k < 3; k++)
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.partition[3 - i][j][k] =
1009 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1011 // mv fields don't use the update_prob subexp model for some reason
1012 for (i = 0; i < 3; i++)
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1016 for (i = 0; i < 2; i++) {
1017 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1020 for (j = 0; j < 10; j++)
1021 if (vp56_rac_get_prob_branchy(&s->c, 252))
1022 s->prob.p.mv_comp[i].classes[j] =
1023 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1025 if (vp56_rac_get_prob_branchy(&s->c, 252))
1026 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1028 for (j = 0; j < 10; j++)
1029 if (vp56_rac_get_prob_branchy(&s->c, 252))
1030 s->prob.p.mv_comp[i].bits[j] =
1031 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1034 for (i = 0; i < 2; i++) {
1035 for (j = 0; j < 2; j++)
1036 for (k = 0; k < 3; k++)
1037 if (vp56_rac_get_prob_branchy(&s->c, 252))
1038 s->prob.p.mv_comp[i].class0_fp[j][k] =
1039 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041 for (j = 0; j < 3; j++)
1042 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043 s->prob.p.mv_comp[i].fp[j] =
1044 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1047 if (s->highprecisionmvs) {
1048 for (i = 0; i < 2; i++) {
1049 if (vp56_rac_get_prob_branchy(&s->c, 252))
1050 s->prob.p.mv_comp[i].class0_hp =
1051 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1053 if (vp56_rac_get_prob_branchy(&s->c, 252))
1054 s->prob.p.mv_comp[i].hp =
1055 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1060 return (data2 - data) + size2;
1063 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1066 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1067 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1070 static void find_ref_mvs(VP9Context *s,
1071 VP56mv *pmv, int ref, int z, int idx, int sb)
1073 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1074 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1075 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1076 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1077 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1078 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1079 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1080 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1081 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1082 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1083 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1084 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1085 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1086 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1087 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1088 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1089 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1090 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1091 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1092 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1093 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1094 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1095 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1096 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1097 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1098 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1099 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1102 int row = s->row, col = s->col, row7 = s->row7;
1103 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1104 #define INVALID_MV 0x80008000U
1105 uint32_t mem = INVALID_MV;
1108 #define RETURN_DIRECT_MV(mv) \
1110 uint32_t m = AV_RN32A(&mv); \
1114 } else if (mem == INVALID_MV) { \
1116 } else if (m != mem) { \
1123 if (sb == 2 || sb == 1) {
1124 RETURN_DIRECT_MV(b->mv[0][z]);
1125 } else if (sb == 3) {
1126 RETURN_DIRECT_MV(b->mv[2][z]);
1127 RETURN_DIRECT_MV(b->mv[1][z]);
1128 RETURN_DIRECT_MV(b->mv[0][z]);
1131 #define RETURN_MV(mv) \
1136 clamp_mv(&tmp, &mv, s); \
1137 m = AV_RN32A(&tmp); \
1141 } else if (mem == INVALID_MV) { \
1143 } else if (m != mem) { \
1148 uint32_t m = AV_RN32A(&mv); \
1150 clamp_mv(pmv, &mv, s); \
1152 } else if (mem == INVALID_MV) { \
1154 } else if (m != mem) { \
1155 clamp_mv(pmv, &mv, s); \
1162 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1163 if (mv->ref[0] == ref) {
1164 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1165 } else if (mv->ref[1] == ref) {
1166 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1169 if (col > s->tiling.tile_col_start) {
1170 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1171 if (mv->ref[0] == ref) {
1172 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1173 } else if (mv->ref[1] == ref) {
1174 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1182 // previously coded MVs in this neighbourhood, using same reference frame
1183 for (; i < 8; i++) {
1184 int c = p[i][0] + col, r = p[i][1] + row;
1186 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1187 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1189 if (mv->ref[0] == ref) {
1190 RETURN_MV(mv->mv[0]);
1191 } else if (mv->ref[1] == ref) {
1192 RETURN_MV(mv->mv[1]);
1197 // MV at this position in previous frame, using same reference frame
1198 if (s->use_last_frame_mvs) {
1199 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1201 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1202 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1203 if (mv->ref[0] == ref) {
1204 RETURN_MV(mv->mv[0]);
1205 } else if (mv->ref[1] == ref) {
1206 RETURN_MV(mv->mv[1]);
1210 #define RETURN_SCALE_MV(mv, scale) \
1213 VP56mv mv_temp = { -mv.x, -mv.y }; \
1214 RETURN_MV(mv_temp); \
1220 // previously coded MVs in this neighbourhood, using different reference frame
1221 for (i = 0; i < 8; i++) {
1222 int c = p[i][0] + col, r = p[i][1] + row;
1224 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1225 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1227 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1228 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1230 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1231 // BUG - libvpx has this condition regardless of whether
1232 // we used the first ref MV and pre-scaling
1233 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1234 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1239 // MV at this position in previous frame, using different reference frame
1240 if (s->use_last_frame_mvs) {
1241 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1243 // no need to await_progress, because we already did that above
1244 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1245 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1247 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1248 // BUG - libvpx has this condition regardless of whether
1249 // we used the first ref MV and pre-scaling
1250 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1251 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1258 #undef RETURN_SCALE_MV
1261 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1263 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1264 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1265 s->prob.p.mv_comp[idx].classes);
1267 s->counts.mv_comp[idx].sign[sign]++;
1268 s->counts.mv_comp[idx].classes[c]++;
1272 for (n = 0, m = 0; m < c; m++) {
1273 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1275 s->counts.mv_comp[idx].bits[m][bit]++;
1278 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1280 s->counts.mv_comp[idx].fp[bit]++;
1282 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1283 s->counts.mv_comp[idx].hp[bit]++;
1287 // bug in libvpx - we count for bw entropy purposes even if the
1289 s->counts.mv_comp[idx].hp[1]++;
1293 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1294 s->counts.mv_comp[idx].class0[n]++;
1295 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1296 s->prob.p.mv_comp[idx].class0_fp[n]);
1297 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1298 n = (n << 3) | (bit << 1);
1300 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1301 s->counts.mv_comp[idx].class0_hp[bit]++;
1305 // bug in libvpx - we count for bw entropy purposes even if the
1307 s->counts.mv_comp[idx].class0_hp[1]++;
1311 return sign ? -(n + 1) : (n + 1);
1314 static void fill_mv(VP9Context *s,
1315 VP56mv *mv, int mode, int sb)
1319 if (mode == ZEROMV) {
1324 // FIXME cache this value and reuse for other subblocks
1325 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1326 mode == NEWMV ? -1 : sb);
1327 // FIXME maybe move this code into find_ref_mvs()
1328 if ((mode == NEWMV || sb == -1) &&
1329 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1343 if (mode == NEWMV) {
1344 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1345 s->prob.p.mv_joint);
1347 s->counts.mv_joint[j]++;
1348 if (j >= MV_JOINT_V)
1349 mv[0].y += read_mv_component(s, 0, hp);
1351 mv[0].x += read_mv_component(s, 1, hp);
1355 // FIXME cache this value and reuse for other subblocks
1356 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1357 mode == NEWMV ? -1 : sb);
1358 if ((mode == NEWMV || sb == -1) &&
1359 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1373 if (mode == NEWMV) {
1374 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1375 s->prob.p.mv_joint);
1377 s->counts.mv_joint[j]++;
1378 if (j >= MV_JOINT_V)
1379 mv[1].y += read_mv_component(s, 0, hp);
1381 mv[1].x += read_mv_component(s, 1, hp);
1387 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1388 ptrdiff_t stride, int v)
1398 int v16 = v * 0x0101;
1406 uint32_t v32 = v * 0x01010101;
1415 uint64_t v64 = v * 0x0101010101010101ULL;
1421 uint32_t v32 = v * 0x01010101;
1424 AV_WN32A(ptr + 4, v32);
1433 static void decode_mode(AVCodecContext *ctx)
1435 static const uint8_t left_ctx[N_BS_SIZES] = {
1436 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1438 static const uint8_t above_ctx[N_BS_SIZES] = {
1439 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1441 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1442 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1443 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1445 VP9Context *s = ctx->priv_data;
1447 int row = s->row, col = s->col, row7 = s->row7;
1448 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1449 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1450 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1451 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1452 int vref, filter_id;
1454 if (!s->segmentation.enabled) {
1456 } else if (s->keyframe || s->intraonly) {
1457 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1458 } else if (!s->segmentation.update_map ||
1459 (s->segmentation.temporal &&
1460 vp56_rac_get_prob_branchy(&s->c,
1461 s->prob.segpred[s->above_segpred_ctx[col] +
1462 s->left_segpred_ctx[row7]]))) {
1463 if (!s->errorres && !s->segmentation.ignore_refmap) {
1465 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1467 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1468 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1469 for (y = 0; y < h4; y++) {
1470 int idx_base = (y + row) * 8 * s->sb_cols + col;
1471 for (x = 0; x < w4; x++)
1472 pred = FFMIN(pred, refsegmap[idx_base + x]);
1474 av_assert1(pred < 8);
1480 memset(&s->above_segpred_ctx[col], 1, w4);
1481 memset(&s->left_segpred_ctx[row7], 1, h4);
1483 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1486 memset(&s->above_segpred_ctx[col], 0, w4);
1487 memset(&s->left_segpred_ctx[row7], 0, h4);
1489 if (s->segmentation.enabled &&
1490 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1491 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1492 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1495 b->skip = s->segmentation.enabled &&
1496 s->segmentation.feat[b->seg_id].skip_enabled;
1498 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1499 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1500 s->counts.skip[c][b->skip]++;
1503 if (s->keyframe || s->intraonly) {
1505 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1506 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1510 if (have_a && have_l) {
1511 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1514 c = have_a ? 2 * s->above_intra_ctx[col] :
1515 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1517 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1518 s->counts.intra[c][bit]++;
1522 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1526 c = (s->above_skip_ctx[col] ? max_tx :
1527 s->above_txfm_ctx[col]) +
1528 (s->left_skip_ctx[row7] ? max_tx :
1529 s->left_txfm_ctx[row7]) > max_tx;
1531 c = s->above_skip_ctx[col] ? 1 :
1532 (s->above_txfm_ctx[col] * 2 > max_tx);
1534 } else if (have_l) {
1535 c = s->left_skip_ctx[row7] ? 1 :
1536 (s->left_txfm_ctx[row7] * 2 > max_tx);
1542 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1544 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1546 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1548 s->counts.tx32p[c][b->tx]++;
1551 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1553 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1554 s->counts.tx16p[c][b->tx]++;
1557 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1558 s->counts.tx8p[c][b->tx]++;
1565 b->tx = FFMIN(max_tx, s->txfmmode);
1568 if (s->keyframe || s->intraonly) {
1569 uint8_t *a = &s->above_mode_ctx[col * 2];
1570 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1573 if (b->bs > BS_8x8) {
1574 // FIXME the memory storage intermediates here aren't really
1575 // necessary, they're just there to make the code slightly
1577 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1578 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1579 if (b->bs != BS_8x4) {
1580 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1581 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1582 l[0] = a[1] = b->mode[1];
1584 l[0] = a[1] = b->mode[1] = b->mode[0];
1586 if (b->bs != BS_4x8) {
1587 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1588 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1589 if (b->bs != BS_8x4) {
1590 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1591 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1592 l[1] = a[1] = b->mode[3];
1594 l[1] = a[1] = b->mode[3] = b->mode[2];
1597 b->mode[2] = b->mode[0];
1598 l[1] = a[1] = b->mode[3] = b->mode[1];
1601 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1602 vp9_default_kf_ymode_probs[*a][*l]);
1603 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1604 // FIXME this can probably be optimized
1605 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1606 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1608 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1609 vp9_default_kf_uvmode_probs[b->mode[3]]);
1610 } else if (b->intra) {
1612 if (b->bs > BS_8x8) {
1613 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1614 s->prob.p.y_mode[0]);
1615 s->counts.y_mode[0][b->mode[0]]++;
1616 if (b->bs != BS_8x4) {
1617 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1618 s->prob.p.y_mode[0]);
1619 s->counts.y_mode[0][b->mode[1]]++;
1621 b->mode[1] = b->mode[0];
1623 if (b->bs != BS_4x8) {
1624 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1625 s->prob.p.y_mode[0]);
1626 s->counts.y_mode[0][b->mode[2]]++;
1627 if (b->bs != BS_8x4) {
1628 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1629 s->prob.p.y_mode[0]);
1630 s->counts.y_mode[0][b->mode[3]]++;
1632 b->mode[3] = b->mode[2];
1635 b->mode[2] = b->mode[0];
1636 b->mode[3] = b->mode[1];
1639 static const uint8_t size_group[10] = {
1640 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1642 int sz = size_group[b->bs];
1644 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1645 s->prob.p.y_mode[sz]);
1646 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1647 s->counts.y_mode[sz][b->mode[3]]++;
1649 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1650 s->prob.p.uv_mode[b->mode[3]]);
1651 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1653 static const uint8_t inter_mode_ctx_lut[14][14] = {
1654 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1655 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1656 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1657 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1658 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1659 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1660 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1661 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1662 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1663 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1664 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1665 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1666 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1667 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1670 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1671 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1673 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1675 // read comp_pred flag
1676 if (s->comppredmode != PRED_SWITCHABLE) {
1677 b->comp = s->comppredmode == PRED_COMPREF;
1681 // FIXME add intra as ref=0xff (or -1) to make these easier?
1684 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1686 } else if (s->above_comp_ctx[col]) {
1687 c = 2 + (s->left_intra_ctx[row7] ||
1688 s->left_ref_ctx[row7] == s->fixcompref);
1689 } else if (s->left_comp_ctx[row7]) {
1690 c = 2 + (s->above_intra_ctx[col] ||
1691 s->above_ref_ctx[col] == s->fixcompref);
1693 c = (!s->above_intra_ctx[col] &&
1694 s->above_ref_ctx[col] == s->fixcompref) ^
1695 (!s->left_intra_ctx[row7] &&
1696 s->left_ref_ctx[row & 7] == s->fixcompref);
1699 c = s->above_comp_ctx[col] ? 3 :
1700 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1702 } else if (have_l) {
1703 c = s->left_comp_ctx[row7] ? 3 :
1704 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1708 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1709 s->counts.comp[c][b->comp]++;
1712 // read actual references
1713 // FIXME probably cache a few variables here to prevent repetitive
1714 // memory accesses below
1715 if (b->comp) /* two references */ {
1716 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1718 b->ref[fix_idx] = s->fixcompref;
1719 // FIXME can this codeblob be replaced by some sort of LUT?
1722 if (s->above_intra_ctx[col]) {
1723 if (s->left_intra_ctx[row7]) {
1726 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1728 } else if (s->left_intra_ctx[row7]) {
1729 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1731 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1733 if (refl == refa && refa == s->varcompref[1]) {
1735 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1736 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1737 (refl == s->fixcompref && refa == s->varcompref[0])) {
1740 c = (refa == refl) ? 3 : 1;
1742 } else if (!s->left_comp_ctx[row7]) {
1743 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1746 c = (refl == s->varcompref[1] &&
1747 refa != s->varcompref[1]) ? 2 : 4;
1749 } else if (!s->above_comp_ctx[col]) {
1750 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1753 c = (refa == s->varcompref[1] &&
1754 refl != s->varcompref[1]) ? 2 : 4;
1757 c = (refl == refa) ? 4 : 2;
1761 if (s->above_intra_ctx[col]) {
1763 } else if (s->above_comp_ctx[col]) {
1764 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1766 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1769 } else if (have_l) {
1770 if (s->left_intra_ctx[row7]) {
1772 } else if (s->left_comp_ctx[row7]) {
1773 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1775 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1780 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1781 b->ref[var_idx] = s->varcompref[bit];
1782 s->counts.comp_ref[c][bit]++;
1783 } else /* single reference */ {
1786 if (have_a && !s->above_intra_ctx[col]) {
1787 if (have_l && !s->left_intra_ctx[row7]) {
1788 if (s->left_comp_ctx[row7]) {
1789 if (s->above_comp_ctx[col]) {
1790 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1791 !s->above_ref_ctx[col]);
1793 c = (3 * !s->above_ref_ctx[col]) +
1794 (!s->fixcompref || !s->left_ref_ctx[row7]);
1796 } else if (s->above_comp_ctx[col]) {
1797 c = (3 * !s->left_ref_ctx[row7]) +
1798 (!s->fixcompref || !s->above_ref_ctx[col]);
1800 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1802 } else if (s->above_intra_ctx[col]) {
1804 } else if (s->above_comp_ctx[col]) {
1805 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1807 c = 4 * (!s->above_ref_ctx[col]);
1809 } else if (have_l && !s->left_intra_ctx[row7]) {
1810 if (s->left_intra_ctx[row7]) {
1812 } else if (s->left_comp_ctx[row7]) {
1813 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1815 c = 4 * (!s->left_ref_ctx[row7]);
1820 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1821 s->counts.single_ref[c][0][bit]++;
1825 // FIXME can this codeblob be replaced by some sort of LUT?
1828 if (s->left_intra_ctx[row7]) {
1829 if (s->above_intra_ctx[col]) {
1831 } else if (s->above_comp_ctx[col]) {
1832 c = 1 + 2 * (s->fixcompref == 1 ||
1833 s->above_ref_ctx[col] == 1);
1834 } else if (!s->above_ref_ctx[col]) {
1837 c = 4 * (s->above_ref_ctx[col] == 1);
1839 } else if (s->above_intra_ctx[col]) {
1840 if (s->left_intra_ctx[row7]) {
1842 } else if (s->left_comp_ctx[row7]) {
1843 c = 1 + 2 * (s->fixcompref == 1 ||
1844 s->left_ref_ctx[row7] == 1);
1845 } else if (!s->left_ref_ctx[row7]) {
1848 c = 4 * (s->left_ref_ctx[row7] == 1);
1850 } else if (s->above_comp_ctx[col]) {
1851 if (s->left_comp_ctx[row7]) {
1852 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1853 c = 3 * (s->fixcompref == 1 ||
1854 s->left_ref_ctx[row7] == 1);
1858 } else if (!s->left_ref_ctx[row7]) {
1859 c = 1 + 2 * (s->fixcompref == 1 ||
1860 s->above_ref_ctx[col] == 1);
1862 c = 3 * (s->left_ref_ctx[row7] == 1) +
1863 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1865 } else if (s->left_comp_ctx[row7]) {
1866 if (!s->above_ref_ctx[col]) {
1867 c = 1 + 2 * (s->fixcompref == 1 ||
1868 s->left_ref_ctx[row7] == 1);
1870 c = 3 * (s->above_ref_ctx[col] == 1) +
1871 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1873 } else if (!s->above_ref_ctx[col]) {
1874 if (!s->left_ref_ctx[row7]) {
1877 c = 4 * (s->left_ref_ctx[row7] == 1);
1879 } else if (!s->left_ref_ctx[row7]) {
1880 c = 4 * (s->above_ref_ctx[col] == 1);
1882 c = 2 * (s->left_ref_ctx[row7] == 1) +
1883 2 * (s->above_ref_ctx[col] == 1);
1886 if (s->above_intra_ctx[col] ||
1887 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1889 } else if (s->above_comp_ctx[col]) {
1890 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1892 c = 4 * (s->above_ref_ctx[col] == 1);
1895 } else if (have_l) {
1896 if (s->left_intra_ctx[row7] ||
1897 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1899 } else if (s->left_comp_ctx[row7]) {
1900 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1902 c = 4 * (s->left_ref_ctx[row7] == 1);
1907 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1908 s->counts.single_ref[c][1][bit]++;
1909 b->ref[0] = 1 + bit;
1914 if (b->bs <= BS_8x8) {
1915 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1916 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1918 static const uint8_t off[10] = {
1919 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1922 // FIXME this needs to use the LUT tables from find_ref_mvs
1923 // because not all are -1,0/0,-1
1924 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1925 [s->left_mode_ctx[row7 + off[b->bs]]];
1927 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1928 s->prob.p.mv_mode[c]);
1929 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1930 s->counts.mv_mode[c][b->mode[0] - 10]++;
1934 if (s->filtermode == FILTER_SWITCHABLE) {
1937 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1938 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1939 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1940 s->left_filter_ctx[row7] : 3;
1942 c = s->above_filter_ctx[col];
1944 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1945 c = s->left_filter_ctx[row7];
1950 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1951 s->prob.p.filter[c]);
1952 s->counts.filter[c][filter_id]++;
1953 b->filter = vp9_filter_lut[filter_id];
1955 b->filter = s->filtermode;
1958 if (b->bs > BS_8x8) {
1959 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1961 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1962 s->prob.p.mv_mode[c]);
1963 s->counts.mv_mode[c][b->mode[0] - 10]++;
1964 fill_mv(s, b->mv[0], b->mode[0], 0);
1966 if (b->bs != BS_8x4) {
1967 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1968 s->prob.p.mv_mode[c]);
1969 s->counts.mv_mode[c][b->mode[1] - 10]++;
1970 fill_mv(s, b->mv[1], b->mode[1], 1);
1972 b->mode[1] = b->mode[0];
1973 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1974 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1977 if (b->bs != BS_4x8) {
1978 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1979 s->prob.p.mv_mode[c]);
1980 s->counts.mv_mode[c][b->mode[2] - 10]++;
1981 fill_mv(s, b->mv[2], b->mode[2], 2);
1983 if (b->bs != BS_8x4) {
1984 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1985 s->prob.p.mv_mode[c]);
1986 s->counts.mv_mode[c][b->mode[3] - 10]++;
1987 fill_mv(s, b->mv[3], b->mode[3], 3);
1989 b->mode[3] = b->mode[2];
1990 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1991 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1994 b->mode[2] = b->mode[0];
1995 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1996 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1997 b->mode[3] = b->mode[1];
1998 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1999 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2002 fill_mv(s, b->mv[0], b->mode[0], -1);
2003 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2004 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2005 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2006 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2007 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2008 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2011 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2015 #define SPLAT_CTX(var, val, n) \
2017 case 1: var = val; break; \
2018 case 2: AV_WN16A(&var, val * 0x0101); break; \
2019 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2020 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2022 uint64_t v64 = val * 0x0101010101010101ULL; \
2023 AV_WN64A( &var, v64); \
2024 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2029 #define SPLAT_CTX(var, val, n) \
2031 case 1: var = val; break; \
2032 case 2: AV_WN16A(&var, val * 0x0101); break; \
2033 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2035 uint32_t v32 = val * 0x01010101; \
2036 AV_WN32A( &var, v32); \
2037 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2041 uint32_t v32 = val * 0x01010101; \
2042 AV_WN32A( &var, v32); \
2043 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2044 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2045 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2051 switch (bwh_tab[1][b->bs][0]) {
2052 #define SET_CTXS(dir, off, n) \
2054 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2055 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2056 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2057 if (!s->keyframe && !s->intraonly) { \
2058 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2059 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2060 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2062 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2063 if (s->filtermode == FILTER_SWITCHABLE) { \
2064 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2069 case 1: SET_CTXS(above, col, 1); break;
2070 case 2: SET_CTXS(above, col, 2); break;
2071 case 4: SET_CTXS(above, col, 4); break;
2072 case 8: SET_CTXS(above, col, 8); break;
2074 switch (bwh_tab[1][b->bs][1]) {
2075 case 1: SET_CTXS(left, row7, 1); break;
2076 case 2: SET_CTXS(left, row7, 2); break;
2077 case 4: SET_CTXS(left, row7, 4); break;
2078 case 8: SET_CTXS(left, row7, 8); break;
2083 if (!s->keyframe && !s->intraonly) {
2084 if (b->bs > BS_8x8) {
2085 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2087 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2088 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2089 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2090 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2091 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2092 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2093 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2094 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2096 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2098 for (n = 0; n < w4 * 2; n++) {
2099 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2100 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2102 for (n = 0; n < h4 * 2; n++) {
2103 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2104 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2110 for (y = 0; y < h4; y++) {
2111 int x, o = (row + y) * s->sb_cols * 8 + col;
2112 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2115 for (x = 0; x < w4; x++) {
2119 } else if (b->comp) {
2120 for (x = 0; x < w4; x++) {
2121 mv[x].ref[0] = b->ref[0];
2122 mv[x].ref[1] = b->ref[1];
2123 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2124 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2127 for (x = 0; x < w4; x++) {
2128 mv[x].ref[0] = b->ref[0];
2130 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2136 // FIXME merge cnt/eob arguments?
2137 static av_always_inline int
2138 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2139 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2140 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2141 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2142 const int16_t *band_counts, const int16_t *qmul)
2144 int i = 0, band = 0, band_left = band_counts[band];
2145 uint8_t *tp = p[0][nnz];
2146 uint8_t cache[1024];
2151 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2152 eob[band][nnz][val]++;
2157 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2158 cnt[band][nnz][0]++;
2160 band_left = band_counts[++band];
2162 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2164 if (++i == n_coeffs)
2165 break; //invalid input; blocks should end with EOB
2170 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2171 cnt[band][nnz][1]++;
2175 // fill in p[3-10] (model fill) - only once per frame for each pos
2177 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2179 cnt[band][nnz][2]++;
2180 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2181 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2182 cache[rc] = val = 2;
2184 val = 3 + vp56_rac_get_prob(c, tp[5]);
2187 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2189 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2190 val = 5 + vp56_rac_get_prob(c, 159);
2192 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2193 val += vp56_rac_get_prob(c, 145);
2197 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2198 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2199 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2200 val += (vp56_rac_get_prob(c, 148) << 1);
2201 val += vp56_rac_get_prob(c, 140);
2203 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2204 val += (vp56_rac_get_prob(c, 155) << 2);
2205 val += (vp56_rac_get_prob(c, 140) << 1);
2206 val += vp56_rac_get_prob(c, 135);
2208 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2209 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2210 val += (vp56_rac_get_prob(c, 157) << 3);
2211 val += (vp56_rac_get_prob(c, 141) << 2);
2212 val += (vp56_rac_get_prob(c, 134) << 1);
2213 val += vp56_rac_get_prob(c, 130);
2216 if (!is8bitsperpixel) {
2218 val += vp56_rac_get_prob(c, 255) << 17;
2219 val += vp56_rac_get_prob(c, 255) << 16;
2221 val += (vp56_rac_get_prob(c, 255) << 15);
2222 val += (vp56_rac_get_prob(c, 255) << 14);
2224 val += (vp56_rac_get_prob(c, 254) << 13);
2225 val += (vp56_rac_get_prob(c, 254) << 12);
2226 val += (vp56_rac_get_prob(c, 254) << 11);
2227 val += (vp56_rac_get_prob(c, 252) << 10);
2228 val += (vp56_rac_get_prob(c, 249) << 9);
2229 val += (vp56_rac_get_prob(c, 243) << 8);
2230 val += (vp56_rac_get_prob(c, 230) << 7);
2231 val += (vp56_rac_get_prob(c, 196) << 6);
2232 val += (vp56_rac_get_prob(c, 177) << 5);
2233 val += (vp56_rac_get_prob(c, 153) << 4);
2234 val += (vp56_rac_get_prob(c, 140) << 3);
2235 val += (vp56_rac_get_prob(c, 133) << 2);
2236 val += (vp56_rac_get_prob(c, 130) << 1);
2237 val += vp56_rac_get_prob(c, 129);
2241 #define STORE_COEF(c, i, v) do { \
2242 if (is8bitsperpixel) { \
2245 AV_WN32A(&c[i * 2], v); \
2249 band_left = band_counts[++band];
2251 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2253 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2254 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2256 } while (++i < n_coeffs);
2261 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2262 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2263 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2264 const int16_t (*nb)[2], const int16_t *band_counts,
2265 const int16_t *qmul)
2267 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2268 nnz, scan, nb, band_counts, qmul);
2271 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2272 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2273 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2274 const int16_t (*nb)[2], const int16_t *band_counts,
2275 const int16_t *qmul)
2277 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2278 nnz, scan, nb, band_counts, qmul);
2281 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2282 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2283 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2284 const int16_t (*nb)[2], const int16_t *band_counts,
2285 const int16_t *qmul)
2287 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2288 nnz, scan, nb, band_counts, qmul);
2291 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2292 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2293 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2294 const int16_t (*nb)[2], const int16_t *band_counts,
2295 const int16_t *qmul)
2297 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2298 nnz, scan, nb, band_counts, qmul);
2301 static av_always_inline void decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2303 VP9Context *s = ctx->priv_data;
2305 int row = s->row, col = s->col;
2306 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2307 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2308 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2309 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2310 int end_x = FFMIN(2 * (s->cols - col), w4);
2311 int end_y = FFMIN(2 * (s->rows - row), h4);
2312 int n, pl, x, y, res;
2313 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2314 int tx = 4 * s->lossless + b->tx;
2315 const int16_t * const *yscans = vp9_scans[tx];
2316 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2317 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2318 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2319 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2320 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2321 static const int16_t band_counts[4][8] = {
2322 { 1, 2, 3, 4, 3, 16 - 13 },
2323 { 1, 2, 3, 4, 11, 64 - 21 },
2324 { 1, 2, 3, 4, 11, 256 - 21 },
2325 { 1, 2, 3, 4, 11, 1024 - 21 },
2327 const int16_t *y_band_counts = band_counts[b->tx];
2328 const int16_t *uv_band_counts = band_counts[b->uvtx];
2329 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2331 #define MERGE(la, end, step, rd) \
2332 for (n = 0; n < end; n += step) \
2333 la[n] = !!rd(&la[n])
2334 #define MERGE_CTX(step, rd) \
2336 MERGE(l, end_y, step, rd); \
2337 MERGE(a, end_x, step, rd); \
2340 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2341 for (n = 0, y = 0; y < end_y; y += step) { \
2342 for (x = 0; x < end_x; x += step, n += step * step) { \
2343 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2344 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2345 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2346 c, e, p, a[x] + l[y], yscans[txtp], \
2347 ynbs[txtp], y_band_counts, qmul[0]); \
2348 a[x] = l[y] = !!res; \
2350 AV_WN16A(&s->eob[n], res); \
2357 #define SPLAT(la, end, step, cond) \
2359 for (n = 1; n < end; n += step) \
2360 la[n] = la[n - 1]; \
2361 } else if (step == 4) { \
2363 for (n = 0; n < end; n += step) \
2364 AV_WN32A(&la[n], la[n] * 0x01010101); \
2366 for (n = 0; n < end; n += step) \
2367 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2369 } else /* step == 8 */ { \
2371 if (HAVE_FAST_64BIT) { \
2372 for (n = 0; n < end; n += step) \
2373 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2375 for (n = 0; n < end; n += step) { \
2376 uint32_t v32 = la[n] * 0x01010101; \
2377 AV_WN32A(&la[n], v32); \
2378 AV_WN32A(&la[n + 4], v32); \
2382 for (n = 0; n < end; n += step) \
2383 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2386 #define SPLAT_CTX(step) \
2388 SPLAT(a, end_x, step, end_x == w4); \
2389 SPLAT(l, end_y, step, end_y == h4); \
2395 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2398 MERGE_CTX(2, AV_RN16A);
2399 DECODE_Y_COEF_LOOP(2, 0,);
2403 MERGE_CTX(4, AV_RN32A);
2404 DECODE_Y_COEF_LOOP(4, 0,);
2408 MERGE_CTX(8, AV_RN64A);
2409 DECODE_Y_COEF_LOOP(8, 0, 32);
2414 #define DECODE_UV_COEF_LOOP(step, v) \
2415 for (n = 0, y = 0; y < end_y; y += step) { \
2416 for (x = 0; x < end_x; x += step, n += step * step) { \
2417 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2418 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2419 16 * step * step, c, e, p, a[x] + l[y], \
2420 uvscan, uvnb, uv_band_counts, qmul[1]); \
2421 a[x] = l[y] = !!res; \
2423 AV_WN16A(&s->uveob[pl][n], res); \
2425 s->uveob[pl][n] = res; \
2430 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2431 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2432 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2437 for (pl = 0; pl < 2; pl++) {
2438 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2439 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2442 DECODE_UV_COEF_LOOP(1,);
2445 MERGE_CTX(2, AV_RN16A);
2446 DECODE_UV_COEF_LOOP(2,);
2450 MERGE_CTX(4, AV_RN32A);
2451 DECODE_UV_COEF_LOOP(4,);
2455 MERGE_CTX(8, AV_RN64A);
2456 DECODE_UV_COEF_LOOP(8, 32);
2463 static void decode_coeffs_8bpp(AVCodecContext *ctx)
2465 decode_coeffs(ctx, 1);
2468 static void decode_coeffs_16bpp(AVCodecContext *ctx)
2470 decode_coeffs(ctx, 0);
2473 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2474 uint8_t *dst_edge, ptrdiff_t stride_edge,
2475 uint8_t *dst_inner, ptrdiff_t stride_inner,
2476 uint8_t *l, int col, int x, int w,
2477 int row, int y, enum TxfmMode tx,
2478 int p, int ss_h, int ss_v, int bytesperpixel)
2480 int have_top = row > 0 || y > 0;
2481 int have_left = col > s->tiling.tile_col_start || x > 0;
2482 int have_right = x < w - 1;
2484 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2485 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2486 { DC_127_PRED, VERT_PRED } },
2487 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2488 { HOR_PRED, HOR_PRED } },
2489 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2490 { LEFT_DC_PRED, DC_PRED } },
2491 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2492 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2493 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2494 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2495 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2496 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2497 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2498 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2499 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2500 { DC_127_PRED, VERT_LEFT_PRED } },
2501 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2502 { HOR_UP_PRED, HOR_UP_PRED } },
2503 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2504 { HOR_PRED, TM_VP8_PRED } },
2506 static const struct {
2507 uint8_t needs_left:1;
2508 uint8_t needs_top:1;
2509 uint8_t needs_topleft:1;
2510 uint8_t needs_topright:1;
2511 uint8_t invert_left:1;
2512 } edges[N_INTRA_PRED_MODES] = {
2513 [VERT_PRED] = { .needs_top = 1 },
2514 [HOR_PRED] = { .needs_left = 1 },
2515 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2516 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2517 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2518 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2519 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2520 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2521 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2522 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2523 [LEFT_DC_PRED] = { .needs_left = 1 },
2524 [TOP_DC_PRED] = { .needs_top = 1 },
2525 [DC_128_PRED] = { 0 },
2526 [DC_127_PRED] = { 0 },
2527 [DC_129_PRED] = { 0 }
2530 av_assert2(mode >= 0 && mode < 10);
2531 mode = mode_conv[mode][have_left][have_top];
2532 if (edges[mode].needs_top) {
2533 uint8_t *top, *topleft;
2534 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2535 int n_px_need_tr = 0;
2537 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2540 // if top of sb64-row, use s->intra_pred_data[] instead of
2541 // dst[-stride] for intra prediction (it contains pre- instead of
2542 // post-loopfilter data)
2544 top = !(row & 7) && !y ?
2545 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2546 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2548 topleft = !(row & 7) && !y ?
2549 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2550 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2551 &dst_inner[-stride_inner];
2555 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2556 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2557 n_px_need + n_px_need_tr <= n_px_have) {
2561 if (n_px_need <= n_px_have) {
2562 memcpy(*a, top, n_px_need * bytesperpixel);
2564 #define memset_bpp(c, i1, v, i2, num) do { \
2565 if (bytesperpixel == 1) { \
2566 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2568 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2569 for (n = 0; n < (num); n++) { \
2570 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2574 memcpy(*a, top, n_px_have * bytesperpixel);
2575 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2578 #define memset_val(c, val, num) do { \
2579 if (bytesperpixel == 1) { \
2580 memset((c), (val), (num)); \
2583 for (n = 0; n < (num); n++) { \
2584 AV_WN16A(&(c)[n * 2], (val)); \
2588 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2590 if (edges[mode].needs_topleft) {
2591 if (have_left && have_top) {
2592 #define assign_bpp(c, i1, v, i2) do { \
2593 if (bytesperpixel == 1) { \
2594 (c)[(i1)] = (v)[(i2)]; \
2596 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2599 assign_bpp(*a, -1, topleft, -1);
2601 #define assign_val(c, i, v) do { \
2602 if (bytesperpixel == 1) { \
2605 AV_WN16A(&(c)[(i) * 2], (v)); \
2608 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2611 if (tx == TX_4X4 && edges[mode].needs_topright) {
2612 if (have_top && have_right &&
2613 n_px_need + n_px_need_tr <= n_px_have) {
2614 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2616 memset_bpp(*a, 4, *a, 3, 4);
2621 if (edges[mode].needs_left) {
2623 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2624 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2625 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2627 if (edges[mode].invert_left) {
2628 if (n_px_need <= n_px_have) {
2629 for (i = 0; i < n_px_need; i++)
2630 assign_bpp(l, i, &dst[i * stride], -1);
2632 for (i = 0; i < n_px_have; i++)
2633 assign_bpp(l, i, &dst[i * stride], -1);
2634 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2637 if (n_px_need <= n_px_have) {
2638 for (i = 0; i < n_px_need; i++)
2639 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2641 for (i = 0; i < n_px_have; i++)
2642 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2643 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2647 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2654 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2655 ptrdiff_t uv_off, int bytesperpixel)
2657 VP9Context *s = ctx->priv_data;
2659 int row = s->row, col = s->col;
2660 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2661 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2662 int end_x = FFMIN(2 * (s->cols - col), w4);
2663 int end_y = FFMIN(2 * (s->rows - row), h4);
2664 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2665 int uvstep1d = 1 << b->uvtx, p;
2666 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2667 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2668 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2670 for (n = 0, y = 0; y < end_y; y += step1d) {
2671 uint8_t *ptr = dst, *ptr_r = dst_r;
2672 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2673 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2674 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2676 uint8_t *a = &a_buf[32];
2677 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2678 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2680 mode = check_intra_mode(s, mode, &a, ptr_r,
2681 s->frames[CUR_FRAME].tf.f->linesize[0],
2682 ptr, s->y_stride, l,
2683 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2684 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2686 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2687 s->block + 16 * n * bytesperpixel, eob);
2689 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2690 dst += 4 * step1d * s->y_stride;
2697 step = 1 << (b->uvtx * 2);
2698 for (p = 0; p < 2; p++) {
2699 dst = s->dst[1 + p];
2700 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2701 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2702 uint8_t *ptr = dst, *ptr_r = dst_r;
2703 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2704 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2705 int mode = b->uvmode;
2706 uint8_t *a = &a_buf[32];
2707 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2709 mode = check_intra_mode(s, mode, &a, ptr_r,
2710 s->frames[CUR_FRAME].tf.f->linesize[1],
2711 ptr, s->uv_stride, l, col, x, w4, row, y,
2712 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2713 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2715 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2716 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2718 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2719 dst += 4 * uvstep1d * s->uv_stride;
2724 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2726 intra_recon(ctx, y_off, uv_off, 1);
2729 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2731 intra_recon(ctx, y_off, uv_off, 2);
2734 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2735 uint8_t *dst, ptrdiff_t dst_stride,
2736 const uint8_t *ref, ptrdiff_t ref_stride,
2737 ThreadFrame *ref_frame,
2738 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2739 int bw, int bh, int w, int h, int bytesperpixel,
2740 const uint16_t *scale, const uint8_t *step)
2742 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2743 // BUG libvpx seems to scale the two components separately. This introduces
2744 // rounding errors but we have to reproduce them to be exactly compatible
2745 // with the output from libvpx...
2746 int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2747 int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2748 int refbw_m1, refbh_m1;
2753 ref += y * ref_stride + x * bytesperpixel;
2756 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2757 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2758 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2759 // we use +7 because the last 7 pixels of each sbrow can be changed in
2760 // the longest loopfilter of the next sbrow
2761 th = (y + refbh_m1 + 4 + 7) >> 6;
2762 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2763 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2764 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2765 ref - 3 * ref_stride - 3 * bytesperpixel,
2767 refbw_m1 + 8, refbh_m1 + 8,
2768 x - 3, y - 3, w, h);
2769 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2772 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2775 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2776 uint8_t *dst_u, uint8_t *dst_v,
2777 ptrdiff_t dst_stride,
2778 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2779 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2780 ThreadFrame *ref_frame,
2781 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2782 int bw, int bh, int w, int h, int bytesperpixel,
2783 const uint16_t *scale, const uint8_t *step)
2786 int refbw_m1, refbh_m1;
2790 // BUG https://code.google.com/p/webm/issues/detail?id=820
2791 mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2793 mx = scale_mv(mv->x << 1, 0) + scale_mv(x * 16, 0);
2796 // BUG https://code.google.com/p/webm/issues/detail?id=820
2797 my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2799 my = scale_mv(mv->y << 1, 1) + scale_mv(y * 16, 1);
2804 ref_u += y * src_stride_u + x * bytesperpixel;
2805 ref_v += y * src_stride_v + x * bytesperpixel;
2808 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2809 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2810 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2811 // we use +7 because the last 7 pixels of each sbrow can be changed in
2812 // the longest loopfilter of the next sbrow
2813 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2814 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2815 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2816 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2817 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2819 refbw_m1 + 8, refbh_m1 + 8,
2820 x - 3, y - 3, w, h);
2821 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2822 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2824 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2825 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2827 refbw_m1 + 8, refbh_m1 + 8,
2828 x - 3, y - 3, w, h);
2829 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2830 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2832 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2833 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2837 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2838 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2839 mv, bw, bh, w, h, bytesperpixel, \
2840 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2841 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2842 row, col, mv, bw, bh, w, h, i) \
2843 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2844 row, col, mv, bw, bh, w, h, bytesperpixel, \
2845 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2847 #define FN(x) x##_scaled_8bpp
2848 #define BYTES_PER_PIXEL 1
2849 #include "vp9_mc_template.c"
2851 #undef BYTES_PER_PIXEL
2852 #define FN(x) x##_scaled_16bpp
2853 #define BYTES_PER_PIXEL 2
2854 #include "vp9_mc_template.c"
2856 #undef mc_chroma_dir
2858 #undef BYTES_PER_PIXEL
2861 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2862 uint8_t *dst, ptrdiff_t dst_stride,
2863 const uint8_t *ref, ptrdiff_t ref_stride,
2864 ThreadFrame *ref_frame,
2865 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2866 int bw, int bh, int w, int h, int bytesperpixel)
2868 int mx = mv->x, my = mv->y, th;
2872 ref += y * ref_stride + x * bytesperpixel;
2875 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2876 // we use +7 because the last 7 pixels of each sbrow can be changed in
2877 // the longest loopfilter of the next sbrow
2878 th = (y + bh + 4 * !!my + 7) >> 6;
2879 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2880 if (x < !!mx * 3 || y < !!my * 3 ||
2881 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2882 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2883 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2885 bw + !!mx * 7, bh + !!my * 7,
2886 x - !!mx * 3, y - !!my * 3, w, h);
2887 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2890 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2893 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2894 uint8_t *dst_u, uint8_t *dst_v,
2895 ptrdiff_t dst_stride,
2896 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2897 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2898 ThreadFrame *ref_frame,
2899 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2900 int bw, int bh, int w, int h, int bytesperpixel)
2902 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2906 ref_u += y * src_stride_u + x * bytesperpixel;
2907 ref_v += y * src_stride_v + x * bytesperpixel;
2910 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2911 // we use +7 because the last 7 pixels of each sbrow can be changed in
2912 // the longest loopfilter of the next sbrow
2913 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2914 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2915 if (x < !!mx * 3 || y < !!my * 3 ||
2916 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2917 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2918 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2920 bw + !!mx * 7, bh + !!my * 7,
2921 x - !!mx * 3, y - !!my * 3, w, h);
2922 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2923 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2925 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2926 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2928 bw + !!mx * 7, bh + !!my * 7,
2929 x - !!mx * 3, y - !!my * 3, w, h);
2930 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2931 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2933 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2934 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2938 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2939 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2940 mv, bw, bh, w, h, bytesperpixel)
2941 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2942 row, col, mv, bw, bh, w, h, i) \
2943 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2944 row, col, mv, bw, bh, w, h, bytesperpixel)
2946 #define FN(x) x##_8bpp
2947 #define BYTES_PER_PIXEL 1
2948 #include "vp9_mc_template.c"
2950 #undef BYTES_PER_PIXEL
2951 #define FN(x) x##_16bpp
2952 #define BYTES_PER_PIXEL 2
2953 #include "vp9_mc_template.c"
2954 #undef mc_luma_dir_dir
2955 #undef mc_chroma_dir_dir
2957 #undef BYTES_PER_PIXEL
2960 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
2962 VP9Context *s = ctx->priv_data;
2964 int row = s->row, col = s->col;
2966 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2967 if (bytesperpixel == 1) {
2968 inter_pred_scaled_8bpp(ctx);
2970 inter_pred_scaled_16bpp(ctx);
2973 if (bytesperpixel == 1) {
2974 inter_pred_8bpp(ctx);
2976 inter_pred_16bpp(ctx);
2980 /* mostly copied intra_recon() */
2982 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2983 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2984 int end_x = FFMIN(2 * (s->cols - col), w4);
2985 int end_y = FFMIN(2 * (s->rows - row), h4);
2986 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2987 int uvstep1d = 1 << b->uvtx, p;
2988 uint8_t *dst = s->dst[0];
2991 for (n = 0, y = 0; y < end_y; y += step1d) {
2993 for (x = 0; x < end_x; x += step1d,
2994 ptr += 4 * step1d * bytesperpixel, n += step) {
2995 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2998 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2999 s->block + 16 * n * bytesperpixel, eob);
3001 dst += 4 * s->y_stride * step1d;
3007 step = 1 << (b->uvtx * 2);
3008 for (p = 0; p < 2; p++) {
3009 dst = s->dst[p + 1];
3010 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3012 for (x = 0; x < end_x; x += uvstep1d,
3013 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3014 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3017 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3018 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3020 dst += 4 * uvstep1d * s->uv_stride;
3026 static void inter_recon_8bpp(AVCodecContext *ctx)
3028 inter_recon(ctx, 1);
3031 static void inter_recon_16bpp(AVCodecContext *ctx)
3033 inter_recon(ctx, 2);
3036 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3037 int row_and_7, int col_and_7,
3038 int w, int h, int col_end, int row_end,
3039 enum TxfmMode tx, int skip_inter)
3041 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3042 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3044 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3045 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3046 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3047 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3049 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3050 // edges. This means that for UV, we work on two subsampled blocks at
3051 // a time, and we only use the topleft block's mode information to set
3052 // things like block strength. Thus, for any block size smaller than
3053 // 16x16, ignore the odd portion of the block.
3054 if (tx == TX_4X4 && (ss_v | ss_h)) {
3069 if (tx == TX_4X4 && !skip_inter) {
3070 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3071 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3072 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3074 for (y = row_and_7; y < h + row_and_7; y++) {
3075 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3077 mask[0][y][1] |= m_row_8;
3078 mask[0][y][2] |= m_row_4;
3079 // for odd lines, if the odd col is not being filtered,
3080 // skip odd row also:
3087 // if a/c are even row/col and b/d are odd, and d is skipped,
3088 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3089 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3090 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3092 mask[1][y][col_mask_id] |= m_col;
3095 mask[0][y][3] |= m_col;
3097 mask[1][y][3] |= m_col;
3100 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3103 int mask_id = (tx == TX_8X8);
3104 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3105 int l2 = tx + ss_h - 1, step1d;
3106 int m_row = m_col & masks[l2];
3108 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3109 // 8wd loopfilter to prevent going off the visible edge.
3110 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3111 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3112 int m_row_8 = m_row - m_row_16;
3114 for (y = row_and_7; y < h + row_and_7; y++) {
3115 mask[0][y][0] |= m_row_16;
3116 mask[0][y][1] |= m_row_8;
3119 for (y = row_and_7; y < h + row_and_7; y++)
3120 mask[0][y][mask_id] |= m_row;
3125 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3126 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3127 mask[1][y][0] |= m_col;
3128 if (y - row_and_7 == h - 1)
3129 mask[1][y][1] |= m_col;
3131 for (y = row_and_7; y < h + row_and_7; y += step1d)
3132 mask[1][y][mask_id] |= m_col;
3134 } else if (tx != TX_4X4) {
3137 mask_id = (tx == TX_8X8) || (h == ss_v);
3138 mask[1][row_and_7][mask_id] |= m_col;
3139 mask_id = (tx == TX_8X8) || (w == ss_h);
3140 for (y = row_and_7; y < h + row_and_7; y++)
3141 mask[0][y][mask_id] |= t;
3143 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3145 for (y = row_and_7; y < h + row_and_7; y++) {
3146 mask[0][y][2] |= t4;
3147 mask[0][y][1] |= t8;
3149 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3154 static void decode_b(AVCodecContext *ctx, int row, int col,
3155 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3156 enum BlockLevel bl, enum BlockPartition bp)
3158 VP9Context *s = ctx->priv_data;
3160 enum BlockSize bs = bl * 3 + bp;
3161 int bytesperpixel = s->bytesperpixel;
3162 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3164 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3170 s->min_mv.x = -(128 + col * 64);
3171 s->min_mv.y = -(128 + row * 64);
3172 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3173 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3179 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3180 (s->ss_v && h4 * 2 == (1 << b->tx)));
3183 if (bytesperpixel == 1) {
3184 decode_coeffs_8bpp(ctx);
3186 decode_coeffs_16bpp(ctx);
3191 #define SPLAT_ZERO_CTX(v, n) \
3193 case 1: v = 0; break; \
3194 case 2: AV_ZERO16(&v); break; \
3195 case 4: AV_ZERO32(&v); break; \
3196 case 8: AV_ZERO64(&v); break; \
3197 case 16: AV_ZERO128(&v); break; \
3199 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3201 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3202 if (s->ss_##dir2) { \
3203 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3204 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3206 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3207 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3212 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3213 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3214 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3215 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3218 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3219 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3220 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3221 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3226 s->block += w4 * h4 * 64 * bytesperpixel;
3227 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3228 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3229 s->eob += 4 * w4 * h4;
3230 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3231 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3237 // emulated overhangs if the stride of the target buffer can't hold. This
3238 // allows to support emu-edge and so on even if we have large block
3240 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3241 (row + h4) > s->rows;
3242 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3243 (row + h4) > s->rows;
3245 s->dst[0] = s->tmp_y;
3248 s->dst[0] = f->data[0] + yoff;
3249 s->y_stride = f->linesize[0];
3252 s->dst[1] = s->tmp_uv[0];
3253 s->dst[2] = s->tmp_uv[1];
3256 s->dst[1] = f->data[1] + uvoff;
3257 s->dst[2] = f->data[2] + uvoff;
3258 s->uv_stride = f->linesize[1];
3262 intra_recon_16bpp(ctx, yoff, uvoff);
3264 intra_recon_8bpp(ctx, yoff, uvoff);
3268 inter_recon_16bpp(ctx);
3270 inter_recon_8bpp(ctx);
3274 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3276 for (n = 0; o < w; n++) {
3281 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3282 s->tmp_y + o, 128, h, 0, 0);
3283 o += bw * bytesperpixel;
3288 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3289 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3291 for (n = 1; o < w; n++) {
3296 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3297 s->tmp_uv[0] + o, 128, h, 0, 0);
3298 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3299 s->tmp_uv[1] + o, 128, h, 0, 0);
3300 o += bw * bytesperpixel;
3305 // pick filter level and find edges to apply filter to
3306 if (s->filter.level &&
3307 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3308 [b->mode[3] != ZEROMV]) > 0) {
3309 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3310 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3312 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3313 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3314 if (s->ss_h || s->ss_v)
3315 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3316 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3317 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3318 b->uvtx, skip_inter);
3320 if (!s->filter.lim_lut[lvl]) {
3321 int sharp = s->filter.sharpness;
3325 limit >>= (sharp + 3) >> 2;
3326 limit = FFMIN(limit, 9 - sharp);
3328 limit = FFMAX(limit, 1);
3330 s->filter.lim_lut[lvl] = limit;
3331 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3337 s->block += w4 * h4 * 64 * bytesperpixel;
3338 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3339 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3340 s->eob += 4 * w4 * h4;
3341 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3342 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3346 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3347 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3349 VP9Context *s = ctx->priv_data;
3350 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3351 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3352 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3353 s->prob.p.partition[bl][c];
3354 enum BlockPartition bp;
3355 ptrdiff_t hbs = 4 >> bl;
3356 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3357 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3358 int bytesperpixel = s->bytesperpixel;
3361 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3362 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3363 } else if (col + hbs < s->cols) { // FIXME why not <=?
3364 if (row + hbs < s->rows) { // FIXME why not <=?
3365 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3367 case PARTITION_NONE:
3368 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3371 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3372 yoff += hbs * 8 * y_stride;
3373 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3374 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3377 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3378 yoff += hbs * 8 * bytesperpixel;
3379 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3380 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3382 case PARTITION_SPLIT:
3383 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3384 decode_sb(ctx, row, col + hbs, lflvl,
3385 yoff + 8 * hbs * bytesperpixel,
3386 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3387 yoff += hbs * 8 * y_stride;
3388 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3389 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3390 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3391 yoff + 8 * hbs * bytesperpixel,
3392 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3397 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3398 bp = PARTITION_SPLIT;
3399 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3400 decode_sb(ctx, row, col + hbs, lflvl,
3401 yoff + 8 * hbs * bytesperpixel,
3402 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3405 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3407 } else if (row + hbs < s->rows) { // FIXME why not <=?
3408 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3409 bp = PARTITION_SPLIT;
3410 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3411 yoff += hbs * 8 * y_stride;
3412 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3413 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3416 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3419 bp = PARTITION_SPLIT;
3420 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3422 s->counts.partition[bl][c][bp]++;
3425 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3426 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3428 VP9Context *s = ctx->priv_data;
3430 ptrdiff_t hbs = 4 >> bl;
3431 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3432 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3433 int bytesperpixel = s->bytesperpixel;
3436 av_assert2(b->bl == BL_8X8);
3437 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3438 } else if (s->b->bl == bl) {
3439 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3440 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3441 yoff += hbs * 8 * y_stride;
3442 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3443 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3444 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3445 yoff += hbs * 8 * bytesperpixel;
3446 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3447 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3450 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3451 if (col + hbs < s->cols) { // FIXME why not <=?
3452 if (row + hbs < s->rows) {
3453 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3454 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3455 yoff += hbs * 8 * y_stride;
3456 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3457 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3458 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3459 yoff + 8 * hbs * bytesperpixel,
3460 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3462 yoff += hbs * 8 * bytesperpixel;
3463 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3464 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3466 } else if (row + hbs < s->rows) {
3467 yoff += hbs * 8 * y_stride;
3468 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3469 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3474 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3475 uint8_t *lvl, uint8_t (*mask)[4],
3476 uint8_t *dst, ptrdiff_t ls)
3478 int y, x, bytesperpixel = s->bytesperpixel;
3480 // filter edges between columns (e.g. block1 | block2)
3481 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3482 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3483 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3484 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3485 unsigned hm = hm1 | hm2 | hm13 | hm23;
3487 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3490 int L = *l, H = L >> 4;
3491 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3493 if (hmask1[0] & x) {
3494 if (hmask2[0] & x) {
3495 av_assert2(l[8 << ss_v] == L);
3496 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3498 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3500 } else if (hm2 & x) {
3503 E |= s->filter.mblim_lut[L] << 8;
3504 I |= s->filter.lim_lut[L] << 8;
3505 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3507 [0](ptr, ls, E, I, H);
3509 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3510 [0](ptr, ls, E, I, H);
3512 } else if (hm2 & x) {
3513 int L = l[8 << ss_v], H = L >> 4;
3514 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3516 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3517 [0](ptr + 8 * ls, ls, E, I, H);
3525 int L = *l, H = L >> 4;
3526 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3531 E |= s->filter.mblim_lut[L] << 8;
3532 I |= s->filter.lim_lut[L] << 8;
3533 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3535 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3537 } else if (hm23 & x) {
3538 int L = l[8 << ss_v], H = L >> 4;
3539 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3541 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3549 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3550 uint8_t *lvl, uint8_t (*mask)[4],
3551 uint8_t *dst, ptrdiff_t ls)
3553 int y, x, bytesperpixel = s->bytesperpixel;
3556 // filter edges between rows (e.g. ------)
3558 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3559 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3560 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3562 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3565 int L = *l, H = L >> 4;
3566 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3569 if (vmask[0] & (x << (1 + ss_h))) {
3570 av_assert2(l[1 + ss_h] == L);
3571 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3573 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3575 } else if (vm & (x << (1 + ss_h))) {
3578 E |= s->filter.mblim_lut[L] << 8;
3579 I |= s->filter.lim_lut[L] << 8;
3580 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3581 [!!(vmask[1] & (x << (1 + ss_h)))]
3582 [1](ptr, ls, E, I, H);
3584 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3585 [1](ptr, ls, E, I, H);
3587 } else if (vm & (x << (1 + ss_h))) {
3588 int L = l[1 + ss_h], H = L >> 4;
3589 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3591 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3592 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3597 int L = *l, H = L >> 4;
3598 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3600 if (vm3 & (x << (1 + ss_h))) {
3603 E |= s->filter.mblim_lut[L] << 8;
3604 I |= s->filter.lim_lut[L] << 8;
3605 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3607 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3609 } else if (vm3 & (x << (1 + ss_h))) {
3610 int L = l[1 + ss_h], H = L >> 4;
3611 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3613 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3626 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3627 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3629 VP9Context *s = ctx->priv_data;
3630 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3631 uint8_t *dst = f->data[0] + yoff;
3632 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3633 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3636 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3637 // if you think of them as acting on a 8x8 block max, we can interleave
3638 // each v/h within the single x loop, but that only works if we work on
3639 // 8 pixel blocks, and we won't always do that (we want at least 16px
3640 // to use SSE2 optimizations, perhaps 32 for AVX2)
3642 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3643 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3645 for (p = 0; p < 2; p++) {
3646 dst = f->data[1 + p] + uvoff;
3647 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3648 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3652 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3654 int sb_start = ( idx * n) >> log2_n;
3655 int sb_end = ((idx + 1) * n) >> log2_n;
3656 *start = FFMIN(sb_start, n) << 3;
3657 *end = FFMIN(sb_end, n) << 3;
3660 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3661 int max_count, int update_factor)
3663 unsigned ct = ct0 + ct1, p2, p1;
3669 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3670 p2 = av_clip(p2, 1, 255);
3671 ct = FFMIN(ct, max_count);
3672 update_factor = FASTDIV(update_factor * ct, max_count);
3674 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3675 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3678 static void adapt_probs(VP9Context *s)
3681 prob_context *p = &s->prob_ctx[s->framectxid].p;
3682 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3685 for (i = 0; i < 4; i++)
3686 for (j = 0; j < 2; j++)
3687 for (k = 0; k < 2; k++)
3688 for (l = 0; l < 6; l++)
3689 for (m = 0; m < 6; m++) {
3690 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3691 unsigned *e = s->counts.eob[i][j][k][l][m];
3692 unsigned *c = s->counts.coef[i][j][k][l][m];
3694 if (l == 0 && m >= 3) // dc only has 3 pt
3697 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3698 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3699 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3702 if (s->keyframe || s->intraonly) {
3703 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3704 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3705 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3706 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3711 for (i = 0; i < 3; i++)
3712 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3715 for (i = 0; i < 4; i++)
3716 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3719 if (s->comppredmode == PRED_SWITCHABLE) {
3720 for (i = 0; i < 5; i++)
3721 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3725 if (s->comppredmode != PRED_SINGLEREF) {
3726 for (i = 0; i < 5; i++)
3727 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3728 s->counts.comp_ref[i][1], 20, 128);
3731 if (s->comppredmode != PRED_COMPREF) {
3732 for (i = 0; i < 5; i++) {
3733 uint8_t *pp = p->single_ref[i];
3734 unsigned (*c)[2] = s->counts.single_ref[i];
3736 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3737 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3741 // block partitioning
3742 for (i = 0; i < 4; i++)
3743 for (j = 0; j < 4; j++) {
3744 uint8_t *pp = p->partition[i][j];
3745 unsigned *c = s->counts.partition[i][j];
3747 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3748 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3749 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3753 if (s->txfmmode == TX_SWITCHABLE) {
3754 for (i = 0; i < 2; i++) {
3755 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3757 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3758 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3759 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3760 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3761 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3762 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3766 // interpolation filter
3767 if (s->filtermode == FILTER_SWITCHABLE) {
3768 for (i = 0; i < 4; i++) {
3769 uint8_t *pp = p->filter[i];
3770 unsigned *c = s->counts.filter[i];
3772 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3773 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3778 for (i = 0; i < 7; i++) {
3779 uint8_t *pp = p->mv_mode[i];
3780 unsigned *c = s->counts.mv_mode[i];
3782 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3783 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3784 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3789 uint8_t *pp = p->mv_joint;
3790 unsigned *c = s->counts.mv_joint;
3792 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3793 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3794 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3798 for (i = 0; i < 2; i++) {
3800 unsigned *c, (*c2)[2], sum;
3802 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3803 s->counts.mv_comp[i].sign[1], 20, 128);
3805 pp = p->mv_comp[i].classes;
3806 c = s->counts.mv_comp[i].classes;
3807 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3808 adapt_prob(&pp[0], c[0], sum, 20, 128);
3810 adapt_prob(&pp[1], c[1], sum, 20, 128);
3812 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3813 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3815 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3816 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3818 adapt_prob(&pp[6], c[6], sum, 20, 128);
3819 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3820 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3821 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3823 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3824 s->counts.mv_comp[i].class0[1], 20, 128);
3825 pp = p->mv_comp[i].bits;
3826 c2 = s->counts.mv_comp[i].bits;
3827 for (j = 0; j < 10; j++)
3828 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3830 for (j = 0; j < 2; j++) {
3831 pp = p->mv_comp[i].class0_fp[j];
3832 c = s->counts.mv_comp[i].class0_fp[j];
3833 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3834 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3835 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3837 pp = p->mv_comp[i].fp;
3838 c = s->counts.mv_comp[i].fp;
3839 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3840 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3841 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3843 if (s->highprecisionmvs) {
3844 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3845 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3846 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3847 s->counts.mv_comp[i].hp[1], 20, 128);
3852 for (i = 0; i < 4; i++) {
3853 uint8_t *pp = p->y_mode[i];
3854 unsigned *c = s->counts.y_mode[i], sum, s2;
3856 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3857 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3858 sum -= c[TM_VP8_PRED];
3859 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3860 sum -= c[VERT_PRED];
3861 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3862 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3864 adapt_prob(&pp[3], s2, sum, 20, 128);
3866 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3867 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3868 sum -= c[DIAG_DOWN_LEFT_PRED];
3869 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3870 sum -= c[VERT_LEFT_PRED];
3871 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3872 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3876 for (i = 0; i < 10; i++) {
3877 uint8_t *pp = p->uv_mode[i];
3878 unsigned *c = s->counts.uv_mode[i], sum, s2;
3880 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3881 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3882 sum -= c[TM_VP8_PRED];
3883 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3884 sum -= c[VERT_PRED];
3885 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3886 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3888 adapt_prob(&pp[3], s2, sum, 20, 128);
3890 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3891 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3892 sum -= c[DIAG_DOWN_LEFT_PRED];
3893 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3894 sum -= c[VERT_LEFT_PRED];
3895 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3896 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3900 static void free_buffers(VP9Context *s)
3902 av_freep(&s->intra_pred_data[0]);
3903 av_freep(&s->b_base);
3904 av_freep(&s->block_base);
3907 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3909 VP9Context *s = ctx->priv_data;
3912 for (i = 0; i < 3; i++) {
3913 if (s->frames[i].tf.f->data[0])
3914 vp9_unref_frame(ctx, &s->frames[i]);
3915 av_frame_free(&s->frames[i].tf.f);
3917 for (i = 0; i < 8; i++) {
3918 if (s->refs[i].f->data[0])
3919 ff_thread_release_buffer(ctx, &s->refs[i]);
3920 av_frame_free(&s->refs[i].f);
3921 if (s->next_refs[i].f->data[0])
3922 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3923 av_frame_free(&s->next_refs[i].f);
3933 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3934 int *got_frame, AVPacket *pkt)
3936 const uint8_t *data = pkt->data;
3937 int size = pkt->size;
3938 VP9Context *s = ctx->priv_data;
3939 int res, tile_row, tile_col, i, ref, row, col;
3940 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3941 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3945 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3947 } else if (res == 0) {
3948 if (!s->refs[ref].f->data[0]) {
3949 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3950 return AVERROR_INVALIDDATA;
3952 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3954 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3955 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3956 for (i = 0; i < 8; i++) {
3957 if (s->next_refs[i].f->data[0])
3958 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3959 if (s->refs[i].f->data[0] &&
3960 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
3969 if (!retain_segmap_ref) {
3970 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3971 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3972 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3973 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3976 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3977 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3978 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3979 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3981 if (s->frames[CUR_FRAME].tf.f->data[0])
3982 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3983 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3985 f = s->frames[CUR_FRAME].tf.f;
3986 f->key_frame = s->keyframe;
3987 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3988 ls_y = f->linesize[0];
3989 ls_uv =f->linesize[1];
3992 for (i = 0; i < 8; i++) {
3993 if (s->next_refs[i].f->data[0])
3994 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3995 if (s->refreshrefmask & (1 << i)) {
3996 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3997 } else if (s->refs[i].f->data[0]) {
3998 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4004 // main tile decode loop
4005 bytesperpixel = s->bytesperpixel;
4006 memset(s->above_partition_ctx, 0, s->cols);
4007 memset(s->above_skip_ctx, 0, s->cols);
4008 if (s->keyframe || s->intraonly) {
4009 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4011 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4013 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4014 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4015 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4016 memset(s->above_segpred_ctx, 0, s->cols);
4017 s->pass = s->frames[CUR_FRAME].uses_2pass =
4018 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4019 if ((res = update_block_buffers(ctx)) < 0) {
4020 av_log(ctx, AV_LOG_ERROR,
4021 "Failed to allocate block buffers\n");
4024 if (s->refreshctx && s->parallelmode) {
4027 for (i = 0; i < 4; i++) {
4028 for (j = 0; j < 2; j++)
4029 for (k = 0; k < 2; k++)
4030 for (l = 0; l < 6; l++)
4031 for (m = 0; m < 6; m++)
4032 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4033 s->prob.coef[i][j][k][l][m], 3);
4034 if (s->txfmmode == i)
4037 s->prob_ctx[s->framectxid].p = s->prob.p;
4038 ff_thread_finish_setup(ctx);
4039 } else if (!s->refreshctx) {
4040 ff_thread_finish_setup(ctx);
4046 s->block = s->block_base;
4047 s->uvblock[0] = s->uvblock_base[0];
4048 s->uvblock[1] = s->uvblock_base[1];
4049 s->eob = s->eob_base;
4050 s->uveob[0] = s->uveob_base[0];
4051 s->uveob[1] = s->uveob_base[1];
4053 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4054 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4055 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4057 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4060 if (tile_col == s->tiling.tile_cols - 1 &&
4061 tile_row == s->tiling.tile_rows - 1) {
4064 tile_size = AV_RB32(data);
4068 if (tile_size > size) {
4069 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4070 return AVERROR_INVALIDDATA;
4072 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4073 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4074 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4075 return AVERROR_INVALIDDATA;
4082 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4083 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4084 struct VP9Filter *lflvl_ptr = s->lflvl;
4085 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4087 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4088 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4089 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4092 memset(s->left_partition_ctx, 0, 8);
4093 memset(s->left_skip_ctx, 0, 8);
4094 if (s->keyframe || s->intraonly) {
4095 memset(s->left_mode_ctx, DC_PRED, 16);
4097 memset(s->left_mode_ctx, NEARESTMV, 8);
4099 memset(s->left_y_nnz_ctx, 0, 16);
4100 memset(s->left_uv_nnz_ctx, 0, 32);
4101 memset(s->left_segpred_ctx, 0, 8);
4103 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4106 for (col = s->tiling.tile_col_start;
4107 col < s->tiling.tile_col_end;
4108 col += 8, yoff2 += 64 * bytesperpixel,
4109 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4110 // FIXME integrate with lf code (i.e. zero after each
4111 // use, similar to invtxfm coefficients, or similar)
4113 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4117 decode_sb_mem(ctx, row, col, lflvl_ptr,
4118 yoff2, uvoff2, BL_64X64);
4120 decode_sb(ctx, row, col, lflvl_ptr,
4121 yoff2, uvoff2, BL_64X64);
4125 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4133 // backup pre-loopfilter reconstruction data for intra
4134 // prediction of next row of sb64s
4135 if (row + 8 < s->rows) {
4136 memcpy(s->intra_pred_data[0],
4137 f->data[0] + yoff + 63 * ls_y,
4138 8 * s->cols * bytesperpixel);
4139 memcpy(s->intra_pred_data[1],
4140 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4141 8 * s->cols * bytesperpixel >> s->ss_h);
4142 memcpy(s->intra_pred_data[2],
4143 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4144 8 * s->cols * bytesperpixel >> s->ss_h);
4147 // loopfilter one row
4148 if (s->filter.level) {
4151 lflvl_ptr = s->lflvl;
4152 for (col = 0; col < s->cols;
4153 col += 8, yoff2 += 64 * bytesperpixel,
4154 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4155 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4159 // FIXME maybe we can make this more finegrained by running the
4160 // loopfilter per-block instead of after each sbrow
4161 // In fact that would also make intra pred left preparation easier?
4162 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4166 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4168 ff_thread_finish_setup(ctx);
4170 } while (s->pass++ == 1);
4171 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4174 for (i = 0; i < 8; i++) {
4175 if (s->refs[i].f->data[0])
4176 ff_thread_release_buffer(ctx, &s->refs[i]);
4177 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4180 if (!s->invisible) {
4181 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4189 static void vp9_decode_flush(AVCodecContext *ctx)
4191 VP9Context *s = ctx->priv_data;
4194 for (i = 0; i < 3; i++)
4195 vp9_unref_frame(ctx, &s->frames[i]);
4196 for (i = 0; i < 8; i++)
4197 ff_thread_release_buffer(ctx, &s->refs[i]);
4200 static int init_frames(AVCodecContext *ctx)
4202 VP9Context *s = ctx->priv_data;
4205 for (i = 0; i < 3; i++) {
4206 s->frames[i].tf.f = av_frame_alloc();
4207 if (!s->frames[i].tf.f) {
4208 vp9_decode_free(ctx);
4209 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4210 return AVERROR(ENOMEM);
4213 for (i = 0; i < 8; i++) {
4214 s->refs[i].f = av_frame_alloc();
4215 s->next_refs[i].f = av_frame_alloc();
4216 if (!s->refs[i].f || !s->next_refs[i].f) {
4217 vp9_decode_free(ctx);
4218 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4219 return AVERROR(ENOMEM);
4226 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4228 VP9Context *s = ctx->priv_data;
4230 ctx->internal->allocate_progress = 1;
4232 s->filter.sharpness = -1;
4234 return init_frames(ctx);
4237 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4239 return init_frames(avctx);
4242 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4245 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4247 // detect size changes in other threads
4248 if (s->intra_pred_data[0] &&
4249 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4253 for (i = 0; i < 3; i++) {
4254 if (s->frames[i].tf.f->data[0])
4255 vp9_unref_frame(dst, &s->frames[i]);
4256 if (ssrc->frames[i].tf.f->data[0]) {
4257 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4261 for (i = 0; i < 8; i++) {
4262 if (s->refs[i].f->data[0])
4263 ff_thread_release_buffer(dst, &s->refs[i]);
4264 if (ssrc->next_refs[i].f->data[0]) {
4265 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4270 s->invisible = ssrc->invisible;
4271 s->keyframe = ssrc->keyframe;
4272 s->ss_v = ssrc->ss_v;
4273 s->ss_h = ssrc->ss_h;
4274 s->segmentation.enabled = ssrc->segmentation.enabled;
4275 s->segmentation.update_map = ssrc->segmentation.update_map;
4276 s->bytesperpixel = ssrc->bytesperpixel;
4278 s->bpp_index = ssrc->bpp_index;
4279 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4280 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4281 if (ssrc->segmentation.enabled) {
4282 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4283 sizeof(s->segmentation.feat));
4289 static const AVProfile profiles[] = {
4290 { FF_PROFILE_VP9_0, "Profile 0" },
4291 { FF_PROFILE_VP9_1, "Profile 1" },
4292 { FF_PROFILE_VP9_2, "Profile 2" },
4293 { FF_PROFILE_VP9_3, "Profile 3" },
4294 { FF_PROFILE_UNKNOWN },
4297 AVCodec ff_vp9_decoder = {
4299 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4300 .type = AVMEDIA_TYPE_VIDEO,
4301 .id = AV_CODEC_ID_VP9,
4302 .priv_data_size = sizeof(VP9Context),
4303 .init = vp9_decode_init,
4304 .close = vp9_decode_free,
4305 .decode = vp9_decode_frame,
4306 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4307 .flush = vp9_decode_flush,
4308 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4309 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4310 .profiles = NULL_IF_CONFIG_SMALL(profiles),