2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
160 uint8_t skip_enabled;
169 unsigned log2_tile_cols, log2_tile_rows;
170 unsigned tile_cols, tile_rows;
171 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
173 unsigned sb_cols, sb_rows, rows, cols;
176 uint8_t coef[4][2][2][6][6][3];
180 uint8_t coef[4][2][2][6][6][11];
185 unsigned y_mode[4][10];
186 unsigned uv_mode[10][10];
187 unsigned filter[4][3];
188 unsigned mv_mode[7][4];
189 unsigned intra[4][2];
191 unsigned single_ref[5][2][2];
192 unsigned comp_ref[5][2];
193 unsigned tx32p[2][4];
194 unsigned tx16p[2][3];
197 unsigned mv_joint[4];
200 unsigned classes[11];
202 unsigned bits[10][2];
203 unsigned class0_fp[2][4];
205 unsigned class0_hp[2];
208 unsigned partition[4][4][4];
209 unsigned coef[4][2][2][6][6][3];
210 unsigned eob[4][2][2][6][6][2];
212 enum TxfmMode txfmmode;
213 enum CompPredMode comppredmode;
215 // contextual (left/above) cache
216 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
217 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
218 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
219 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
220 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
228 uint8_t *above_partition_ctx;
229 uint8_t *above_mode_ctx;
230 // FIXME maybe merge some of the below in a flags field?
231 uint8_t *above_y_nnz_ctx;
232 uint8_t *above_uv_nnz_ctx[2];
233 uint8_t *above_skip_ctx; // 1bit
234 uint8_t *above_txfm_ctx; // 2bit
235 uint8_t *above_segpred_ctx; // 1bit
236 uint8_t *above_intra_ctx; // 1bit
237 uint8_t *above_comp_ctx; // 1bit
238 uint8_t *above_ref_ctx; // 2bit
239 uint8_t *above_filter_ctx;
240 VP56mv (*above_mv_ctx)[2];
243 uint8_t *intra_pred_data[3];
244 struct VP9Filter *lflvl;
245 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
247 // block reconstruction intermediates
248 int block_alloc_using_2pass;
249 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
250 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
251 struct { int x, y; } min_mv, max_mv;
252 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
253 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
254 uint16_t mvscale[3][2];
255 uint8_t mvstep[3][2];
258 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
260 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
261 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
263 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
264 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
268 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
270 VP9Context *s = ctx->priv_data;
273 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
275 sz = 64 * s->sb_cols * s->sb_rows;
276 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
277 ff_thread_release_buffer(ctx, &f->tf);
278 return AVERROR(ENOMEM);
281 f->segmentation_map = f->extradata->data;
282 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
287 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
289 ff_thread_release_buffer(ctx, &f->tf);
290 av_buffer_unref(&f->extradata);
291 f->segmentation_map = NULL;
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
307 dst->uses_2pass = src->uses_2pass;
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 VP9Context *s = ctx->priv_data;
316 int bytesperpixel = s->bytesperpixel;
318 av_assert0(w > 0 && h > 0);
320 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326 s->sb_cols = (w + 63) >> 6;
327 s->sb_rows = (h + 63) >> 6;
328 s->cols = (w + 7) >> 3;
329 s->rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332 av_freep(&s->intra_pred_data[0]);
333 // FIXME we slightly over-allocate here for subsampled chroma, but a little
334 // bit of padding shouldn't affect performance...
335 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338 return AVERROR(ENOMEM);
339 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342 assign(s->above_y_nnz_ctx, uint8_t *, 16);
343 assign(s->above_mode_ctx, uint8_t *, 16);
344 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347 assign(s->above_partition_ctx, uint8_t *, 8);
348 assign(s->above_skip_ctx, uint8_t *, 8);
349 assign(s->above_txfm_ctx, uint8_t *, 8);
350 assign(s->above_segpred_ctx, uint8_t *, 8);
351 assign(s->above_intra_ctx, uint8_t *, 8);
352 assign(s->above_comp_ctx, uint8_t *, 8);
353 assign(s->above_ref_ctx, uint8_t *, 8);
354 assign(s->above_filter_ctx, uint8_t *, 8);
355 assign(s->lflvl, struct VP9Filter *, 1);
358 // these will be re-allocated a little later
359 av_freep(&s->b_base);
360 av_freep(&s->block_base);
362 if (s->bpp != s->last_bpp) {
363 ff_vp9dsp_init(&s->dsp, s->bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
364 ff_videodsp_init(&s->vdsp, s->bpp);
365 s->last_bpp = s->bpp;
371 static int update_block_buffers(AVCodecContext *ctx)
373 VP9Context *s = ctx->priv_data;
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380 av_free(s->block_base);
381 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383 if (s->frames[CUR_FRAME].uses_2pass) {
384 int sbs = s->sb_cols * s->sb_rows;
386 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388 16 * 16 + 2 * chroma_eobs) * sbs);
389 if (!s->b_base || !s->block_base)
390 return AVERROR(ENOMEM);
391 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397 s->b_base = av_malloc(sizeof(VP9Block));
398 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399 16 * 16 + 2 * chroma_eobs);
400 if (!s->b_base || !s->block_base)
401 return AVERROR(ENOMEM);
402 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405 s->uveob_base[0] = s->eob_base + 16 * 16;
406 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
414 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416 int v = get_bits(gb, n);
417 return get_bits1(gb) ? -v : v;
420 static av_always_inline int inv_recenter_nonneg(int v, int m)
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
428 static const int inv_map_table[255] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 /* This code is trying to do a differential probability update. For a
452 * current probability A in the range [1, 255], the difference to a new
453 * probability of any value can be expressed differentially as 1-A,255-A
454 * where some part of this (absolute range) exists both in positive as
455 * well as the negative part, whereas another part only exists in one
456 * half. We're trying to code this shared part differentially, i.e.
457 * times two where the value of the lowest bit specifies the sign, and
458 * the single part is then coded on top of this. This absolute difference
459 * then again has a value of [0,254], but a bigger value in this range
460 * indicates that we're further away from the original value A, so we
461 * can code this as a VLC code, since higher values are increasingly
462 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463 * updates vs. the 'fine, exact' updates further down the range, which
464 * adds one extra dimension to this differential update model. */
466 if (!vp8_rac_get(c)) {
467 d = vp8_rac_get_uint(c, 4) + 0;
468 } else if (!vp8_rac_get(c)) {
469 d = vp8_rac_get_uint(c, 4) + 16;
470 } else if (!vp8_rac_get(c)) {
471 d = vp8_rac_get_uint(c, 5) + 32;
473 d = vp8_rac_get_uint(c, 7);
475 d = (d << 1) - 65 + vp8_rac_get(c);
477 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
480 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
481 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
484 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
486 static const enum AVColorSpace colorspaces[8] = {
487 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
488 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
490 VP9Context *s = ctx->priv_data;
491 enum AVPixelFormat res;
492 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
495 s->bpp = 8 + bits * 2;
496 s->bytesperpixel = (7 + s->bpp) >> 3;
497 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
498 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
499 static const enum AVPixelFormat pix_fmt_rgb[3] = {
500 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
502 if (ctx->profile & 1) {
503 s->ss_h = s->ss_v = 0;
504 res = pix_fmt_rgb[bits];
505 ctx->color_range = AVCOL_RANGE_JPEG;
506 if (get_bits1(&s->gb)) {
507 av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
508 return AVERROR_INVALIDDATA;
511 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
513 return AVERROR_INVALIDDATA;
516 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
517 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
518 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
519 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
520 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
521 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
522 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
524 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
525 if (ctx->profile & 1) {
526 s->ss_h = get_bits1(&s->gb);
527 s->ss_v = get_bits1(&s->gb);
528 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
529 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
531 return AVERROR_INVALIDDATA;
532 } else if (get_bits1(&s->gb)) {
533 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
535 return AVERROR_INVALIDDATA;
538 s->ss_h = s->ss_v = 1;
539 res = pix_fmt_for_ss[bits][1][1];
546 static int decode_frame_header(AVCodecContext *ctx,
547 const uint8_t *data, int size, int *ref)
549 VP9Context *s = ctx->priv_data;
550 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
551 enum AVPixelFormat fmt = ctx->pix_fmt;
553 const uint8_t *data2;
556 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
557 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
560 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
561 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
562 return AVERROR_INVALIDDATA;
564 ctx->profile = get_bits1(&s->gb);
565 ctx->profile |= get_bits1(&s->gb) << 1;
566 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
567 if (ctx->profile > 3) {
568 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
569 return AVERROR_INVALIDDATA;
571 if (get_bits1(&s->gb)) {
572 *ref = get_bits(&s->gb, 3);
575 s->last_keyframe = s->keyframe;
576 s->keyframe = !get_bits1(&s->gb);
577 last_invisible = s->invisible;
578 s->invisible = !get_bits1(&s->gb);
579 s->errorres = get_bits1(&s->gb);
580 s->use_last_frame_mvs = !s->errorres && !last_invisible;
582 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
583 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
584 return AVERROR_INVALIDDATA;
586 if ((fmt = read_colorspace_details(ctx)) < 0)
588 // for profile 1, here follows the subsampling bits
589 s->refreshrefmask = 0xff;
590 w = get_bits(&s->gb, 16) + 1;
591 h = get_bits(&s->gb, 16) + 1;
592 if (get_bits1(&s->gb)) // display size
593 skip_bits(&s->gb, 32);
595 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
596 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
598 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
599 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
600 return AVERROR_INVALIDDATA;
602 if (ctx->profile >= 1) {
603 if ((fmt = read_colorspace_details(ctx)) < 0)
606 s->ss_h = s->ss_v = 1;
609 s->bytesperpixel = 1;
610 fmt = AV_PIX_FMT_YUV420P;
611 ctx->colorspace = AVCOL_SPC_BT470BG;
612 ctx->color_range = AVCOL_RANGE_JPEG;
614 s->refreshrefmask = get_bits(&s->gb, 8);
615 w = get_bits(&s->gb, 16) + 1;
616 h = get_bits(&s->gb, 16) + 1;
617 if (get_bits1(&s->gb)) // display size
618 skip_bits(&s->gb, 32);
620 s->refreshrefmask = get_bits(&s->gb, 8);
621 s->refidx[0] = get_bits(&s->gb, 3);
622 s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
623 s->refidx[1] = get_bits(&s->gb, 3);
624 s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
625 s->refidx[2] = get_bits(&s->gb, 3);
626 s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
627 if (!s->refs[s->refidx[0]].f->data[0] ||
628 !s->refs[s->refidx[1]].f->data[0] ||
629 !s->refs[s->refidx[2]].f->data[0]) {
630 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
631 return AVERROR_INVALIDDATA;
633 if (get_bits1(&s->gb)) {
634 w = s->refs[s->refidx[0]].f->width;
635 h = s->refs[s->refidx[0]].f->height;
636 } else if (get_bits1(&s->gb)) {
637 w = s->refs[s->refidx[1]].f->width;
638 h = s->refs[s->refidx[1]].f->height;
639 } else if (get_bits1(&s->gb)) {
640 w = s->refs[s->refidx[2]].f->width;
641 h = s->refs[s->refidx[2]].f->height;
643 w = get_bits(&s->gb, 16) + 1;
644 h = get_bits(&s->gb, 16) + 1;
646 // Note that in this code, "CUR_FRAME" is actually before we
647 // have formally allocated a frame, and thus actually represents
649 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
650 s->frames[CUR_FRAME].tf.f->height == h;
651 if (get_bits1(&s->gb)) // display size
652 skip_bits(&s->gb, 32);
653 s->highprecisionmvs = get_bits1(&s->gb);
654 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
656 s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
657 s->signbias[0] != s->signbias[2]);
658 if (s->allowcompinter) {
659 if (s->signbias[0] == s->signbias[1]) {
661 s->varcompref[0] = 0;
662 s->varcompref[1] = 1;
663 } else if (s->signbias[0] == s->signbias[2]) {
665 s->varcompref[0] = 0;
666 s->varcompref[1] = 2;
669 s->varcompref[0] = 1;
670 s->varcompref[1] = 2;
674 for (i = 0; i < 3; i++) {
675 AVFrame *ref = s->refs[s->refidx[i]].f;
676 int refw = ref->width, refh = ref->height;
678 if (ref->format != fmt) {
679 av_log(ctx, AV_LOG_ERROR,
680 "Ref pixfmt (%s) did not match current frame (%s)",
681 av_get_pix_fmt_name(ref->format),
682 av_get_pix_fmt_name(fmt));
683 return AVERROR_INVALIDDATA;
684 } else if (refw == w && refh == h) {
685 s->mvscale[i][0] = s->mvscale[i][1] = 0;
687 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
688 av_log(ctx, AV_LOG_ERROR,
689 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
691 return AVERROR_INVALIDDATA;
693 s->mvscale[i][0] = (refw << 14) / w;
694 s->mvscale[i][1] = (refh << 14) / h;
695 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
696 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
701 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
702 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
703 s->framectxid = c = get_bits(&s->gb, 2);
705 /* loopfilter header data */
706 if (s->keyframe || s->errorres || s->intraonly) {
707 // reset loopfilter defaults
708 s->lf_delta.ref[0] = 1;
709 s->lf_delta.ref[1] = 0;
710 s->lf_delta.ref[2] = -1;
711 s->lf_delta.ref[3] = -1;
712 s->lf_delta.mode[0] = 0;
713 s->lf_delta.mode[1] = 0;
714 memset(s->segmentation.feat, 0, sizeof(s->segmentation.feat));
716 s->filter.level = get_bits(&s->gb, 6);
717 sharp = get_bits(&s->gb, 3);
718 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
719 // the old cache values since they are still valid
720 if (s->filter.sharpness != sharp)
721 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
722 s->filter.sharpness = sharp;
723 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
724 if (get_bits1(&s->gb)) {
725 for (i = 0; i < 4; i++)
726 if (get_bits1(&s->gb))
727 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
728 for (i = 0; i < 2; i++)
729 if (get_bits1(&s->gb))
730 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
734 /* quantization header data */
735 s->yac_qi = get_bits(&s->gb, 8);
736 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
737 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
738 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
739 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
740 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
742 ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
744 /* segmentation header info */
745 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
746 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
747 for (i = 0; i < 7; i++)
748 s->prob.seg[i] = get_bits1(&s->gb) ?
749 get_bits(&s->gb, 8) : 255;
750 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
751 for (i = 0; i < 3; i++)
752 s->prob.segpred[i] = get_bits1(&s->gb) ?
753 get_bits(&s->gb, 8) : 255;
757 if (get_bits1(&s->gb)) {
758 s->segmentation.absolute_vals = get_bits1(&s->gb);
759 for (i = 0; i < 8; i++) {
760 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
761 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
762 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
763 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
764 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
765 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
766 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
771 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
772 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
773 int qyac, qydc, quvac, quvdc, lflvl, sh;
775 if (s->segmentation.enabled && s->segmentation.feat[i].q_enabled) {
776 if (s->segmentation.absolute_vals)
777 qyac = av_clip_uintp2(s->segmentation.feat[i].q_val, 8);
779 qyac = av_clip_uintp2(s->yac_qi + s->segmentation.feat[i].q_val, 8);
783 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
784 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
785 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
786 qyac = av_clip_uintp2(qyac, 8);
788 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
789 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
790 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
791 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
793 sh = s->filter.level >= 32;
794 if (s->segmentation.enabled && s->segmentation.feat[i].lf_enabled) {
795 if (s->segmentation.absolute_vals)
796 lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
798 lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
800 lflvl = s->filter.level;
802 if (s->lf_delta.enabled) {
803 s->segmentation.feat[i].lflvl[0][0] =
804 s->segmentation.feat[i].lflvl[0][1] =
805 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
806 for (j = 1; j < 4; j++) {
807 s->segmentation.feat[i].lflvl[j][0] =
808 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
809 s->lf_delta.mode[0]) * (1 << sh)), 6);
810 s->segmentation.feat[i].lflvl[j][1] =
811 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
812 s->lf_delta.mode[1]) * (1 << sh)), 6);
815 memset(s->segmentation.feat[i].lflvl, lflvl,
816 sizeof(s->segmentation.feat[i].lflvl));
821 if ((res = update_size(ctx, w, h, fmt)) < 0) {
822 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
825 for (s->tiling.log2_tile_cols = 0;
826 s->sb_cols > (64 << s->tiling.log2_tile_cols);
827 s->tiling.log2_tile_cols++) ;
828 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
829 max = FFMAX(0, max - 1);
830 while (max > s->tiling.log2_tile_cols) {
831 if (get_bits1(&s->gb))
832 s->tiling.log2_tile_cols++;
836 s->tiling.log2_tile_rows = decode012(&s->gb);
837 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
838 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
839 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
840 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
841 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
843 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
844 return AVERROR(ENOMEM);
848 if (s->keyframe || s->errorres || (s->intraonly && s->resetctx == 3)) {
849 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
850 s->prob_ctx[3].p = vp9_default_probs;
851 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
852 sizeof(vp9_default_coef_probs));
853 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
854 sizeof(vp9_default_coef_probs));
855 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
856 sizeof(vp9_default_coef_probs));
857 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
858 sizeof(vp9_default_coef_probs));
859 } else if (s->intraonly && s->resetctx == 2) {
860 s->prob_ctx[c].p = vp9_default_probs;
861 memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
862 sizeof(vp9_default_coef_probs));
865 // next 16 bits is size of the rest of the header (arith-coded)
866 size2 = get_bits(&s->gb, 16);
867 data2 = align_get_bits(&s->gb);
868 if (size2 > size - (data2 - data)) {
869 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
870 return AVERROR_INVALIDDATA;
872 ff_vp56_init_range_decoder(&s->c, data2, size2);
873 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
874 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
875 return AVERROR_INVALIDDATA;
878 if (s->keyframe || s->intraonly) {
879 memset(s->counts.coef, 0, sizeof(s->counts.coef));
880 memset(s->counts.eob, 0, sizeof(s->counts.eob));
882 memset(&s->counts, 0, sizeof(s->counts));
884 // FIXME is it faster to not copy here, but do it down in the fw updates
885 // as explicit copies if the fw update is missing (and skip the copy upon
887 s->prob.p = s->prob_ctx[c].p;
891 s->txfmmode = TX_4X4;
893 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
894 if (s->txfmmode == 3)
895 s->txfmmode += vp8_rac_get(&s->c);
897 if (s->txfmmode == TX_SWITCHABLE) {
898 for (i = 0; i < 2; i++)
899 if (vp56_rac_get_prob_branchy(&s->c, 252))
900 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
901 for (i = 0; i < 2; i++)
902 for (j = 0; j < 2; j++)
903 if (vp56_rac_get_prob_branchy(&s->c, 252))
904 s->prob.p.tx16p[i][j] =
905 update_prob(&s->c, s->prob.p.tx16p[i][j]);
906 for (i = 0; i < 2; i++)
907 for (j = 0; j < 3; j++)
908 if (vp56_rac_get_prob_branchy(&s->c, 252))
909 s->prob.p.tx32p[i][j] =
910 update_prob(&s->c, s->prob.p.tx32p[i][j]);
915 for (i = 0; i < 4; i++) {
916 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
917 if (vp8_rac_get(&s->c)) {
918 for (j = 0; j < 2; j++)
919 for (k = 0; k < 2; k++)
920 for (l = 0; l < 6; l++)
921 for (m = 0; m < 6; m++) {
922 uint8_t *p = s->prob.coef[i][j][k][l][m];
923 uint8_t *r = ref[j][k][l][m];
924 if (m >= 3 && l == 0) // dc only has 3 pt
926 for (n = 0; n < 3; n++) {
927 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
928 p[n] = update_prob(&s->c, r[n]);
936 for (j = 0; j < 2; j++)
937 for (k = 0; k < 2; k++)
938 for (l = 0; l < 6; l++)
939 for (m = 0; m < 6; m++) {
940 uint8_t *p = s->prob.coef[i][j][k][l][m];
941 uint8_t *r = ref[j][k][l][m];
942 if (m > 3 && l == 0) // dc only has 3 pt
948 if (s->txfmmode == i)
953 for (i = 0; i < 3; i++)
954 if (vp56_rac_get_prob_branchy(&s->c, 252))
955 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
956 if (!s->keyframe && !s->intraonly) {
957 for (i = 0; i < 7; i++)
958 for (j = 0; j < 3; j++)
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
960 s->prob.p.mv_mode[i][j] =
961 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
963 if (s->filtermode == FILTER_SWITCHABLE)
964 for (i = 0; i < 4; i++)
965 for (j = 0; j < 2; j++)
966 if (vp56_rac_get_prob_branchy(&s->c, 252))
967 s->prob.p.filter[i][j] =
968 update_prob(&s->c, s->prob.p.filter[i][j]);
970 for (i = 0; i < 4; i++)
971 if (vp56_rac_get_prob_branchy(&s->c, 252))
972 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
974 if (s->allowcompinter) {
975 s->comppredmode = vp8_rac_get(&s->c);
977 s->comppredmode += vp8_rac_get(&s->c);
978 if (s->comppredmode == PRED_SWITCHABLE)
979 for (i = 0; i < 5; i++)
980 if (vp56_rac_get_prob_branchy(&s->c, 252))
982 update_prob(&s->c, s->prob.p.comp[i]);
984 s->comppredmode = PRED_SINGLEREF;
987 if (s->comppredmode != PRED_COMPREF) {
988 for (i = 0; i < 5; i++) {
989 if (vp56_rac_get_prob_branchy(&s->c, 252))
990 s->prob.p.single_ref[i][0] =
991 update_prob(&s->c, s->prob.p.single_ref[i][0]);
992 if (vp56_rac_get_prob_branchy(&s->c, 252))
993 s->prob.p.single_ref[i][1] =
994 update_prob(&s->c, s->prob.p.single_ref[i][1]);
998 if (s->comppredmode != PRED_SINGLEREF) {
999 for (i = 0; i < 5; i++)
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.comp_ref[i] =
1002 update_prob(&s->c, s->prob.p.comp_ref[i]);
1005 for (i = 0; i < 4; i++)
1006 for (j = 0; j < 9; j++)
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.y_mode[i][j] =
1009 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1011 for (i = 0; i < 4; i++)
1012 for (j = 0; j < 4; j++)
1013 for (k = 0; k < 3; k++)
1014 if (vp56_rac_get_prob_branchy(&s->c, 252))
1015 s->prob.p.partition[3 - i][j][k] =
1016 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1018 // mv fields don't use the update_prob subexp model for some reason
1019 for (i = 0; i < 3; i++)
1020 if (vp56_rac_get_prob_branchy(&s->c, 252))
1021 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1023 for (i = 0; i < 2; i++) {
1024 if (vp56_rac_get_prob_branchy(&s->c, 252))
1025 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1027 for (j = 0; j < 10; j++)
1028 if (vp56_rac_get_prob_branchy(&s->c, 252))
1029 s->prob.p.mv_comp[i].classes[j] =
1030 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1035 for (j = 0; j < 10; j++)
1036 if (vp56_rac_get_prob_branchy(&s->c, 252))
1037 s->prob.p.mv_comp[i].bits[j] =
1038 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041 for (i = 0; i < 2; i++) {
1042 for (j = 0; j < 2; j++)
1043 for (k = 0; k < 3; k++)
1044 if (vp56_rac_get_prob_branchy(&s->c, 252))
1045 s->prob.p.mv_comp[i].class0_fp[j][k] =
1046 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1048 for (j = 0; j < 3; j++)
1049 if (vp56_rac_get_prob_branchy(&s->c, 252))
1050 s->prob.p.mv_comp[i].fp[j] =
1051 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1054 if (s->highprecisionmvs) {
1055 for (i = 0; i < 2; i++) {
1056 if (vp56_rac_get_prob_branchy(&s->c, 252))
1057 s->prob.p.mv_comp[i].class0_hp =
1058 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1060 if (vp56_rac_get_prob_branchy(&s->c, 252))
1061 s->prob.p.mv_comp[i].hp =
1062 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1067 return (data2 - data) + size2;
1070 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1073 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1074 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1077 static void find_ref_mvs(VP9Context *s,
1078 VP56mv *pmv, int ref, int z, int idx, int sb)
1080 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1081 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1082 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1083 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1084 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1085 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1086 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1087 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1088 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1089 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1090 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1091 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1092 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1093 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1094 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1095 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1096 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1097 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1098 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1099 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1100 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1101 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1102 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1103 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1104 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1105 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1106 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1109 int row = s->row, col = s->col, row7 = s->row7;
1110 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1111 #define INVALID_MV 0x80008000U
1112 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1115 #define RETURN_DIRECT_MV(mv) \
1117 uint32_t m = AV_RN32A(&mv); \
1121 } else if (mem == INVALID_MV) { \
1123 } else if (m != mem) { \
1130 if (sb == 2 || sb == 1) {
1131 RETURN_DIRECT_MV(b->mv[0][z]);
1132 } else if (sb == 3) {
1133 RETURN_DIRECT_MV(b->mv[2][z]);
1134 RETURN_DIRECT_MV(b->mv[1][z]);
1135 RETURN_DIRECT_MV(b->mv[0][z]);
1138 #define RETURN_MV(mv) \
1143 av_assert2(idx == 1); \
1144 av_assert2(mem != INVALID_MV); \
1145 if (mem_sub8x8 == INVALID_MV) { \
1146 clamp_mv(&tmp, &mv, s); \
1147 m = AV_RN32A(&tmp); \
1152 mem_sub8x8 = AV_RN32A(&mv); \
1153 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1154 clamp_mv(&tmp, &mv, s); \
1155 m = AV_RN32A(&tmp); \
1159 /* BUG I'm pretty sure this isn't the intention */ \
1165 uint32_t m = AV_RN32A(&mv); \
1167 clamp_mv(pmv, &mv, s); \
1169 } else if (mem == INVALID_MV) { \
1171 } else if (m != mem) { \
1172 clamp_mv(pmv, &mv, s); \
1179 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1180 if (mv->ref[0] == ref) {
1181 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1182 } else if (mv->ref[1] == ref) {
1183 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1186 if (col > s->tiling.tile_col_start) {
1187 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1188 if (mv->ref[0] == ref) {
1189 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1190 } else if (mv->ref[1] == ref) {
1191 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1199 // previously coded MVs in this neighbourhood, using same reference frame
1200 for (; i < 8; i++) {
1201 int c = p[i][0] + col, r = p[i][1] + row;
1203 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1204 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1206 if (mv->ref[0] == ref) {
1207 RETURN_MV(mv->mv[0]);
1208 } else if (mv->ref[1] == ref) {
1209 RETURN_MV(mv->mv[1]);
1214 // MV at this position in previous frame, using same reference frame
1215 if (s->use_last_frame_mvs) {
1216 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1218 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1219 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1220 if (mv->ref[0] == ref) {
1221 RETURN_MV(mv->mv[0]);
1222 } else if (mv->ref[1] == ref) {
1223 RETURN_MV(mv->mv[1]);
1227 #define RETURN_SCALE_MV(mv, scale) \
1230 VP56mv mv_temp = { -mv.x, -mv.y }; \
1231 RETURN_MV(mv_temp); \
1237 // previously coded MVs in this neighbourhood, using different reference frame
1238 for (i = 0; i < 8; i++) {
1239 int c = p[i][0] + col, r = p[i][1] + row;
1241 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1242 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1244 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1245 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1247 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1248 // BUG - libvpx has this condition regardless of whether
1249 // we used the first ref MV and pre-scaling
1250 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1251 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1256 // MV at this position in previous frame, using different reference frame
1257 if (s->use_last_frame_mvs) {
1258 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1260 // no need to await_progress, because we already did that above
1261 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1262 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1264 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1265 // BUG - libvpx has this condition regardless of whether
1266 // we used the first ref MV and pre-scaling
1267 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1268 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1273 clamp_mv(pmv, pmv, s);
1276 #undef RETURN_SCALE_MV
1279 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1281 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1282 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1283 s->prob.p.mv_comp[idx].classes);
1285 s->counts.mv_comp[idx].sign[sign]++;
1286 s->counts.mv_comp[idx].classes[c]++;
1290 for (n = 0, m = 0; m < c; m++) {
1291 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1293 s->counts.mv_comp[idx].bits[m][bit]++;
1296 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1298 s->counts.mv_comp[idx].fp[bit]++;
1300 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1301 s->counts.mv_comp[idx].hp[bit]++;
1305 // bug in libvpx - we count for bw entropy purposes even if the
1307 s->counts.mv_comp[idx].hp[1]++;
1311 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1312 s->counts.mv_comp[idx].class0[n]++;
1313 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1314 s->prob.p.mv_comp[idx].class0_fp[n]);
1315 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1316 n = (n << 3) | (bit << 1);
1318 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1319 s->counts.mv_comp[idx].class0_hp[bit]++;
1323 // bug in libvpx - we count for bw entropy purposes even if the
1325 s->counts.mv_comp[idx].class0_hp[1]++;
1329 return sign ? -(n + 1) : (n + 1);
1332 static void fill_mv(VP9Context *s,
1333 VP56mv *mv, int mode, int sb)
1337 if (mode == ZEROMV) {
1342 // FIXME cache this value and reuse for other subblocks
1343 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1344 mode == NEWMV ? -1 : sb);
1345 // FIXME maybe move this code into find_ref_mvs()
1346 if ((mode == NEWMV || sb == -1) &&
1347 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1361 if (mode == NEWMV) {
1362 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1363 s->prob.p.mv_joint);
1365 s->counts.mv_joint[j]++;
1366 if (j >= MV_JOINT_V)
1367 mv[0].y += read_mv_component(s, 0, hp);
1369 mv[0].x += read_mv_component(s, 1, hp);
1373 // FIXME cache this value and reuse for other subblocks
1374 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1375 mode == NEWMV ? -1 : sb);
1376 if ((mode == NEWMV || sb == -1) &&
1377 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1391 if (mode == NEWMV) {
1392 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1393 s->prob.p.mv_joint);
1395 s->counts.mv_joint[j]++;
1396 if (j >= MV_JOINT_V)
1397 mv[1].y += read_mv_component(s, 0, hp);
1399 mv[1].x += read_mv_component(s, 1, hp);
1405 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1406 ptrdiff_t stride, int v)
1416 int v16 = v * 0x0101;
1424 uint32_t v32 = v * 0x01010101;
1433 uint64_t v64 = v * 0x0101010101010101ULL;
1439 uint32_t v32 = v * 0x01010101;
1442 AV_WN32A(ptr + 4, v32);
1451 static void decode_mode(AVCodecContext *ctx)
1453 static const uint8_t left_ctx[N_BS_SIZES] = {
1454 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1456 static const uint8_t above_ctx[N_BS_SIZES] = {
1457 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1459 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1460 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1461 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1463 VP9Context *s = ctx->priv_data;
1465 int row = s->row, col = s->col, row7 = s->row7;
1466 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1467 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1468 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1469 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1470 int vref, filter_id;
1472 if (!s->segmentation.enabled) {
1474 } else if (s->keyframe || s->intraonly) {
1475 b->seg_id = !s->segmentation.update_map ? 0 :
1476 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1477 } else if (!s->segmentation.update_map ||
1478 (s->segmentation.temporal &&
1479 vp56_rac_get_prob_branchy(&s->c,
1480 s->prob.segpred[s->above_segpred_ctx[col] +
1481 s->left_segpred_ctx[row7]]))) {
1482 if (!s->errorres && s->frames[REF_FRAME_SEGMAP].segmentation_map) {
1484 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1486 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1487 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1488 for (y = 0; y < h4; y++) {
1489 int idx_base = (y + row) * 8 * s->sb_cols + col;
1490 for (x = 0; x < w4; x++)
1491 pred = FFMIN(pred, refsegmap[idx_base + x]);
1493 av_assert1(pred < 8);
1499 memset(&s->above_segpred_ctx[col], 1, w4);
1500 memset(&s->left_segpred_ctx[row7], 1, h4);
1502 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1505 memset(&s->above_segpred_ctx[col], 0, w4);
1506 memset(&s->left_segpred_ctx[row7], 0, h4);
1508 if (s->segmentation.enabled &&
1509 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1510 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1511 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1514 b->skip = s->segmentation.enabled &&
1515 s->segmentation.feat[b->seg_id].skip_enabled;
1517 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1518 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1519 s->counts.skip[c][b->skip]++;
1522 if (s->keyframe || s->intraonly) {
1524 } else if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1525 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1529 if (have_a && have_l) {
1530 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1533 c = have_a ? 2 * s->above_intra_ctx[col] :
1534 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1536 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1537 s->counts.intra[c][bit]++;
1541 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1545 c = (s->above_skip_ctx[col] ? max_tx :
1546 s->above_txfm_ctx[col]) +
1547 (s->left_skip_ctx[row7] ? max_tx :
1548 s->left_txfm_ctx[row7]) > max_tx;
1550 c = s->above_skip_ctx[col] ? 1 :
1551 (s->above_txfm_ctx[col] * 2 > max_tx);
1553 } else if (have_l) {
1554 c = s->left_skip_ctx[row7] ? 1 :
1555 (s->left_txfm_ctx[row7] * 2 > max_tx);
1561 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1563 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1565 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1567 s->counts.tx32p[c][b->tx]++;
1570 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1572 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1573 s->counts.tx16p[c][b->tx]++;
1576 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1577 s->counts.tx8p[c][b->tx]++;
1584 b->tx = FFMIN(max_tx, s->txfmmode);
1587 if (s->keyframe || s->intraonly) {
1588 uint8_t *a = &s->above_mode_ctx[col * 2];
1589 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1592 if (b->bs > BS_8x8) {
1593 // FIXME the memory storage intermediates here aren't really
1594 // necessary, they're just there to make the code slightly
1596 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1597 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1598 if (b->bs != BS_8x4) {
1599 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1600 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1601 l[0] = a[1] = b->mode[1];
1603 l[0] = a[1] = b->mode[1] = b->mode[0];
1605 if (b->bs != BS_4x8) {
1606 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1607 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1608 if (b->bs != BS_8x4) {
1609 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1610 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1611 l[1] = a[1] = b->mode[3];
1613 l[1] = a[1] = b->mode[3] = b->mode[2];
1616 b->mode[2] = b->mode[0];
1617 l[1] = a[1] = b->mode[3] = b->mode[1];
1620 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1621 vp9_default_kf_ymode_probs[*a][*l]);
1622 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1623 // FIXME this can probably be optimized
1624 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1625 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1627 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1628 vp9_default_kf_uvmode_probs[b->mode[3]]);
1629 } else if (b->intra) {
1631 if (b->bs > BS_8x8) {
1632 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1633 s->prob.p.y_mode[0]);
1634 s->counts.y_mode[0][b->mode[0]]++;
1635 if (b->bs != BS_8x4) {
1636 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1637 s->prob.p.y_mode[0]);
1638 s->counts.y_mode[0][b->mode[1]]++;
1640 b->mode[1] = b->mode[0];
1642 if (b->bs != BS_4x8) {
1643 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1644 s->prob.p.y_mode[0]);
1645 s->counts.y_mode[0][b->mode[2]]++;
1646 if (b->bs != BS_8x4) {
1647 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1648 s->prob.p.y_mode[0]);
1649 s->counts.y_mode[0][b->mode[3]]++;
1651 b->mode[3] = b->mode[2];
1654 b->mode[2] = b->mode[0];
1655 b->mode[3] = b->mode[1];
1658 static const uint8_t size_group[10] = {
1659 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1661 int sz = size_group[b->bs];
1663 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1664 s->prob.p.y_mode[sz]);
1665 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1666 s->counts.y_mode[sz][b->mode[3]]++;
1668 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1669 s->prob.p.uv_mode[b->mode[3]]);
1670 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1672 static const uint8_t inter_mode_ctx_lut[14][14] = {
1673 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1674 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1675 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1676 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1677 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1678 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1679 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1680 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1681 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1682 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1683 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1684 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1685 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1686 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1689 if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].ref_enabled) {
1690 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1692 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1694 // read comp_pred flag
1695 if (s->comppredmode != PRED_SWITCHABLE) {
1696 b->comp = s->comppredmode == PRED_COMPREF;
1700 // FIXME add intra as ref=0xff (or -1) to make these easier?
1703 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1705 } else if (s->above_comp_ctx[col]) {
1706 c = 2 + (s->left_intra_ctx[row7] ||
1707 s->left_ref_ctx[row7] == s->fixcompref);
1708 } else if (s->left_comp_ctx[row7]) {
1709 c = 2 + (s->above_intra_ctx[col] ||
1710 s->above_ref_ctx[col] == s->fixcompref);
1712 c = (!s->above_intra_ctx[col] &&
1713 s->above_ref_ctx[col] == s->fixcompref) ^
1714 (!s->left_intra_ctx[row7] &&
1715 s->left_ref_ctx[row & 7] == s->fixcompref);
1718 c = s->above_comp_ctx[col] ? 3 :
1719 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1721 } else if (have_l) {
1722 c = s->left_comp_ctx[row7] ? 3 :
1723 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1727 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1728 s->counts.comp[c][b->comp]++;
1731 // read actual references
1732 // FIXME probably cache a few variables here to prevent repetitive
1733 // memory accesses below
1734 if (b->comp) /* two references */ {
1735 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1737 b->ref[fix_idx] = s->fixcompref;
1738 // FIXME can this codeblob be replaced by some sort of LUT?
1741 if (s->above_intra_ctx[col]) {
1742 if (s->left_intra_ctx[row7]) {
1745 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1747 } else if (s->left_intra_ctx[row7]) {
1748 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1750 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1752 if (refl == refa && refa == s->varcompref[1]) {
1754 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1755 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1756 (refl == s->fixcompref && refa == s->varcompref[0])) {
1759 c = (refa == refl) ? 3 : 1;
1761 } else if (!s->left_comp_ctx[row7]) {
1762 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1765 c = (refl == s->varcompref[1] &&
1766 refa != s->varcompref[1]) ? 2 : 4;
1768 } else if (!s->above_comp_ctx[col]) {
1769 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1772 c = (refa == s->varcompref[1] &&
1773 refl != s->varcompref[1]) ? 2 : 4;
1776 c = (refl == refa) ? 4 : 2;
1780 if (s->above_intra_ctx[col]) {
1782 } else if (s->above_comp_ctx[col]) {
1783 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1785 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1788 } else if (have_l) {
1789 if (s->left_intra_ctx[row7]) {
1791 } else if (s->left_comp_ctx[row7]) {
1792 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1794 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1799 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1800 b->ref[var_idx] = s->varcompref[bit];
1801 s->counts.comp_ref[c][bit]++;
1802 } else /* single reference */ {
1805 if (have_a && !s->above_intra_ctx[col]) {
1806 if (have_l && !s->left_intra_ctx[row7]) {
1807 if (s->left_comp_ctx[row7]) {
1808 if (s->above_comp_ctx[col]) {
1809 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1810 !s->above_ref_ctx[col]);
1812 c = (3 * !s->above_ref_ctx[col]) +
1813 (!s->fixcompref || !s->left_ref_ctx[row7]);
1815 } else if (s->above_comp_ctx[col]) {
1816 c = (3 * !s->left_ref_ctx[row7]) +
1817 (!s->fixcompref || !s->above_ref_ctx[col]);
1819 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1821 } else if (s->above_intra_ctx[col]) {
1823 } else if (s->above_comp_ctx[col]) {
1824 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1826 c = 4 * (!s->above_ref_ctx[col]);
1828 } else if (have_l && !s->left_intra_ctx[row7]) {
1829 if (s->left_intra_ctx[row7]) {
1831 } else if (s->left_comp_ctx[row7]) {
1832 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1834 c = 4 * (!s->left_ref_ctx[row7]);
1839 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1840 s->counts.single_ref[c][0][bit]++;
1844 // FIXME can this codeblob be replaced by some sort of LUT?
1847 if (s->left_intra_ctx[row7]) {
1848 if (s->above_intra_ctx[col]) {
1850 } else if (s->above_comp_ctx[col]) {
1851 c = 1 + 2 * (s->fixcompref == 1 ||
1852 s->above_ref_ctx[col] == 1);
1853 } else if (!s->above_ref_ctx[col]) {
1856 c = 4 * (s->above_ref_ctx[col] == 1);
1858 } else if (s->above_intra_ctx[col]) {
1859 if (s->left_intra_ctx[row7]) {
1861 } else if (s->left_comp_ctx[row7]) {
1862 c = 1 + 2 * (s->fixcompref == 1 ||
1863 s->left_ref_ctx[row7] == 1);
1864 } else if (!s->left_ref_ctx[row7]) {
1867 c = 4 * (s->left_ref_ctx[row7] == 1);
1869 } else if (s->above_comp_ctx[col]) {
1870 if (s->left_comp_ctx[row7]) {
1871 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1872 c = 3 * (s->fixcompref == 1 ||
1873 s->left_ref_ctx[row7] == 1);
1877 } else if (!s->left_ref_ctx[row7]) {
1878 c = 1 + 2 * (s->fixcompref == 1 ||
1879 s->above_ref_ctx[col] == 1);
1881 c = 3 * (s->left_ref_ctx[row7] == 1) +
1882 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1884 } else if (s->left_comp_ctx[row7]) {
1885 if (!s->above_ref_ctx[col]) {
1886 c = 1 + 2 * (s->fixcompref == 1 ||
1887 s->left_ref_ctx[row7] == 1);
1889 c = 3 * (s->above_ref_ctx[col] == 1) +
1890 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1892 } else if (!s->above_ref_ctx[col]) {
1893 if (!s->left_ref_ctx[row7]) {
1896 c = 4 * (s->left_ref_ctx[row7] == 1);
1898 } else if (!s->left_ref_ctx[row7]) {
1899 c = 4 * (s->above_ref_ctx[col] == 1);
1901 c = 2 * (s->left_ref_ctx[row7] == 1) +
1902 2 * (s->above_ref_ctx[col] == 1);
1905 if (s->above_intra_ctx[col] ||
1906 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1908 } else if (s->above_comp_ctx[col]) {
1909 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1911 c = 4 * (s->above_ref_ctx[col] == 1);
1914 } else if (have_l) {
1915 if (s->left_intra_ctx[row7] ||
1916 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1918 } else if (s->left_comp_ctx[row7]) {
1919 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1921 c = 4 * (s->left_ref_ctx[row7] == 1);
1926 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1927 s->counts.single_ref[c][1][bit]++;
1928 b->ref[0] = 1 + bit;
1933 if (b->bs <= BS_8x8) {
1934 if (s->segmentation.enabled && s->segmentation.feat[b->seg_id].skip_enabled) {
1935 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1937 static const uint8_t off[10] = {
1938 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1941 // FIXME this needs to use the LUT tables from find_ref_mvs
1942 // because not all are -1,0/0,-1
1943 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1944 [s->left_mode_ctx[row7 + off[b->bs]]];
1946 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1947 s->prob.p.mv_mode[c]);
1948 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1949 s->counts.mv_mode[c][b->mode[0] - 10]++;
1953 if (s->filtermode == FILTER_SWITCHABLE) {
1956 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1957 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1958 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1959 s->left_filter_ctx[row7] : 3;
1961 c = s->above_filter_ctx[col];
1963 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1964 c = s->left_filter_ctx[row7];
1969 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1970 s->prob.p.filter[c]);
1971 s->counts.filter[c][filter_id]++;
1972 b->filter = vp9_filter_lut[filter_id];
1974 b->filter = s->filtermode;
1977 if (b->bs > BS_8x8) {
1978 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1980 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1981 s->prob.p.mv_mode[c]);
1982 s->counts.mv_mode[c][b->mode[0] - 10]++;
1983 fill_mv(s, b->mv[0], b->mode[0], 0);
1985 if (b->bs != BS_8x4) {
1986 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1987 s->prob.p.mv_mode[c]);
1988 s->counts.mv_mode[c][b->mode[1] - 10]++;
1989 fill_mv(s, b->mv[1], b->mode[1], 1);
1991 b->mode[1] = b->mode[0];
1992 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1993 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1996 if (b->bs != BS_4x8) {
1997 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1998 s->prob.p.mv_mode[c]);
1999 s->counts.mv_mode[c][b->mode[2] - 10]++;
2000 fill_mv(s, b->mv[2], b->mode[2], 2);
2002 if (b->bs != BS_8x4) {
2003 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2004 s->prob.p.mv_mode[c]);
2005 s->counts.mv_mode[c][b->mode[3] - 10]++;
2006 fill_mv(s, b->mv[3], b->mode[3], 3);
2008 b->mode[3] = b->mode[2];
2009 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2010 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2013 b->mode[2] = b->mode[0];
2014 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2015 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2016 b->mode[3] = b->mode[1];
2017 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2018 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2021 fill_mv(s, b->mv[0], b->mode[0], -1);
2022 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2023 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2024 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2025 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2026 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2027 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2030 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2034 #define SPLAT_CTX(var, val, n) \
2036 case 1: var = val; break; \
2037 case 2: AV_WN16A(&var, val * 0x0101); break; \
2038 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2039 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2041 uint64_t v64 = val * 0x0101010101010101ULL; \
2042 AV_WN64A( &var, v64); \
2043 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2048 #define SPLAT_CTX(var, val, n) \
2050 case 1: var = val; break; \
2051 case 2: AV_WN16A(&var, val * 0x0101); break; \
2052 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2054 uint32_t v32 = val * 0x01010101; \
2055 AV_WN32A( &var, v32); \
2056 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2060 uint32_t v32 = val * 0x01010101; \
2061 AV_WN32A( &var, v32); \
2062 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2063 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2064 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2070 switch (bwh_tab[1][b->bs][0]) {
2071 #define SET_CTXS(dir, off, n) \
2073 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2074 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2075 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2076 if (!s->keyframe && !s->intraonly) { \
2077 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2078 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2079 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2081 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2082 if (s->filtermode == FILTER_SWITCHABLE) { \
2083 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2088 case 1: SET_CTXS(above, col, 1); break;
2089 case 2: SET_CTXS(above, col, 2); break;
2090 case 4: SET_CTXS(above, col, 4); break;
2091 case 8: SET_CTXS(above, col, 8); break;
2093 switch (bwh_tab[1][b->bs][1]) {
2094 case 1: SET_CTXS(left, row7, 1); break;
2095 case 2: SET_CTXS(left, row7, 2); break;
2096 case 4: SET_CTXS(left, row7, 4); break;
2097 case 8: SET_CTXS(left, row7, 8); break;
2102 if (!s->keyframe && !s->intraonly) {
2103 if (b->bs > BS_8x8) {
2104 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2106 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2107 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2108 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2109 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2110 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2111 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2112 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2113 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2115 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2117 for (n = 0; n < w4 * 2; n++) {
2118 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2119 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2121 for (n = 0; n < h4 * 2; n++) {
2122 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2123 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2129 for (y = 0; y < h4; y++) {
2130 int x, o = (row + y) * s->sb_cols * 8 + col;
2131 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2134 for (x = 0; x < w4; x++) {
2138 } else if (b->comp) {
2139 for (x = 0; x < w4; x++) {
2140 mv[x].ref[0] = b->ref[0];
2141 mv[x].ref[1] = b->ref[1];
2142 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2143 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2146 for (x = 0; x < w4; x++) {
2147 mv[x].ref[0] = b->ref[0];
2149 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2155 // FIXME merge cnt/eob arguments?
2156 static av_always_inline int
2157 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2158 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2159 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2160 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2161 const int16_t *band_counts, const int16_t *qmul)
2163 int i = 0, band = 0, band_left = band_counts[band];
2164 uint8_t *tp = p[0][nnz];
2165 uint8_t cache[1024];
2170 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2171 eob[band][nnz][val]++;
2176 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2177 cnt[band][nnz][0]++;
2179 band_left = band_counts[++band];
2181 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2183 if (++i == n_coeffs)
2184 break; //invalid input; blocks should end with EOB
2189 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2190 cnt[band][nnz][1]++;
2194 // fill in p[3-10] (model fill) - only once per frame for each pos
2196 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2198 cnt[band][nnz][2]++;
2199 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2200 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2201 cache[rc] = val = 2;
2203 val = 3 + vp56_rac_get_prob(c, tp[5]);
2206 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2208 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2209 val = 5 + vp56_rac_get_prob(c, 159);
2211 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2212 val += vp56_rac_get_prob(c, 145);
2216 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2217 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2218 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2219 val += (vp56_rac_get_prob(c, 148) << 1);
2220 val += vp56_rac_get_prob(c, 140);
2222 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2223 val += (vp56_rac_get_prob(c, 155) << 2);
2224 val += (vp56_rac_get_prob(c, 140) << 1);
2225 val += vp56_rac_get_prob(c, 135);
2227 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2228 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2229 val += (vp56_rac_get_prob(c, 157) << 3);
2230 val += (vp56_rac_get_prob(c, 141) << 2);
2231 val += (vp56_rac_get_prob(c, 134) << 1);
2232 val += vp56_rac_get_prob(c, 130);
2235 if (!is8bitsperpixel) {
2237 val += vp56_rac_get_prob(c, 255) << 17;
2238 val += vp56_rac_get_prob(c, 255) << 16;
2240 val += (vp56_rac_get_prob(c, 255) << 15);
2241 val += (vp56_rac_get_prob(c, 255) << 14);
2243 val += (vp56_rac_get_prob(c, 254) << 13);
2244 val += (vp56_rac_get_prob(c, 254) << 12);
2245 val += (vp56_rac_get_prob(c, 254) << 11);
2246 val += (vp56_rac_get_prob(c, 252) << 10);
2247 val += (vp56_rac_get_prob(c, 249) << 9);
2248 val += (vp56_rac_get_prob(c, 243) << 8);
2249 val += (vp56_rac_get_prob(c, 230) << 7);
2250 val += (vp56_rac_get_prob(c, 196) << 6);
2251 val += (vp56_rac_get_prob(c, 177) << 5);
2252 val += (vp56_rac_get_prob(c, 153) << 4);
2253 val += (vp56_rac_get_prob(c, 140) << 3);
2254 val += (vp56_rac_get_prob(c, 133) << 2);
2255 val += (vp56_rac_get_prob(c, 130) << 1);
2256 val += vp56_rac_get_prob(c, 129);
2260 #define STORE_COEF(c, i, v) do { \
2261 if (is8bitsperpixel) { \
2264 AV_WN32A(&c[i * 2], v); \
2268 band_left = band_counts[++band];
2270 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2272 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2273 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2275 } while (++i < n_coeffs);
2280 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2281 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2282 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2283 const int16_t (*nb)[2], const int16_t *band_counts,
2284 const int16_t *qmul)
2286 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2287 nnz, scan, nb, band_counts, qmul);
2290 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2291 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2292 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2293 const int16_t (*nb)[2], const int16_t *band_counts,
2294 const int16_t *qmul)
2296 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2297 nnz, scan, nb, band_counts, qmul);
2300 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2301 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2302 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2303 const int16_t (*nb)[2], const int16_t *band_counts,
2304 const int16_t *qmul)
2306 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2307 nnz, scan, nb, band_counts, qmul);
2310 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2311 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2312 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2313 const int16_t (*nb)[2], const int16_t *band_counts,
2314 const int16_t *qmul)
2316 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2317 nnz, scan, nb, band_counts, qmul);
2320 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2322 VP9Context *s = ctx->priv_data;
2324 int row = s->row, col = s->col;
2325 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2326 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2327 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2328 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2329 int end_x = FFMIN(2 * (s->cols - col), w4);
2330 int end_y = FFMIN(2 * (s->rows - row), h4);
2331 int n, pl, x, y, res;
2332 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2333 int tx = 4 * s->lossless + b->tx;
2334 const int16_t * const *yscans = vp9_scans[tx];
2335 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2336 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2337 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2338 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2339 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2340 static const int16_t band_counts[4][8] = {
2341 { 1, 2, 3, 4, 3, 16 - 13 },
2342 { 1, 2, 3, 4, 11, 64 - 21 },
2343 { 1, 2, 3, 4, 11, 256 - 21 },
2344 { 1, 2, 3, 4, 11, 1024 - 21 },
2346 const int16_t *y_band_counts = band_counts[b->tx];
2347 const int16_t *uv_band_counts = band_counts[b->uvtx];
2348 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2349 int total_coeff = 0;
2351 #define MERGE(la, end, step, rd) \
2352 for (n = 0; n < end; n += step) \
2353 la[n] = !!rd(&la[n])
2354 #define MERGE_CTX(step, rd) \
2356 MERGE(l, end_y, step, rd); \
2357 MERGE(a, end_x, step, rd); \
2360 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2361 for (n = 0, y = 0; y < end_y; y += step) { \
2362 for (x = 0; x < end_x; x += step, n += step * step) { \
2363 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2364 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2365 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2366 c, e, p, a[x] + l[y], yscans[txtp], \
2367 ynbs[txtp], y_band_counts, qmul[0]); \
2368 a[x] = l[y] = !!res; \
2369 total_coeff |= !!res; \
2371 AV_WN16A(&s->eob[n], res); \
2378 #define SPLAT(la, end, step, cond) \
2380 for (n = 1; n < end; n += step) \
2381 la[n] = la[n - 1]; \
2382 } else if (step == 4) { \
2384 for (n = 0; n < end; n += step) \
2385 AV_WN32A(&la[n], la[n] * 0x01010101); \
2387 for (n = 0; n < end; n += step) \
2388 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2390 } else /* step == 8 */ { \
2392 if (HAVE_FAST_64BIT) { \
2393 for (n = 0; n < end; n += step) \
2394 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2396 for (n = 0; n < end; n += step) { \
2397 uint32_t v32 = la[n] * 0x01010101; \
2398 AV_WN32A(&la[n], v32); \
2399 AV_WN32A(&la[n + 4], v32); \
2403 for (n = 0; n < end; n += step) \
2404 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2407 #define SPLAT_CTX(step) \
2409 SPLAT(a, end_x, step, end_x == w4); \
2410 SPLAT(l, end_y, step, end_y == h4); \
2416 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2419 MERGE_CTX(2, AV_RN16A);
2420 DECODE_Y_COEF_LOOP(2, 0,);
2424 MERGE_CTX(4, AV_RN32A);
2425 DECODE_Y_COEF_LOOP(4, 0,);
2429 MERGE_CTX(8, AV_RN64A);
2430 DECODE_Y_COEF_LOOP(8, 0, 32);
2435 #define DECODE_UV_COEF_LOOP(step, v) \
2436 for (n = 0, y = 0; y < end_y; y += step) { \
2437 for (x = 0; x < end_x; x += step, n += step * step) { \
2438 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2439 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2440 16 * step * step, c, e, p, a[x] + l[y], \
2441 uvscan, uvnb, uv_band_counts, qmul[1]); \
2442 a[x] = l[y] = !!res; \
2443 total_coeff |= !!res; \
2445 AV_WN16A(&s->uveob[pl][n], res); \
2447 s->uveob[pl][n] = res; \
2452 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2453 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2454 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2459 for (pl = 0; pl < 2; pl++) {
2460 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2461 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2464 DECODE_UV_COEF_LOOP(1,);
2467 MERGE_CTX(2, AV_RN16A);
2468 DECODE_UV_COEF_LOOP(2,);
2472 MERGE_CTX(4, AV_RN32A);
2473 DECODE_UV_COEF_LOOP(4,);
2477 MERGE_CTX(8, AV_RN64A);
2478 DECODE_UV_COEF_LOOP(8, 32);
2487 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2489 return decode_coeffs(ctx, 1);
2492 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2494 return decode_coeffs(ctx, 0);
2497 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2498 uint8_t *dst_edge, ptrdiff_t stride_edge,
2499 uint8_t *dst_inner, ptrdiff_t stride_inner,
2500 uint8_t *l, int col, int x, int w,
2501 int row, int y, enum TxfmMode tx,
2502 int p, int ss_h, int ss_v, int bytesperpixel)
2504 int have_top = row > 0 || y > 0;
2505 int have_left = col > s->tiling.tile_col_start || x > 0;
2506 int have_right = x < w - 1;
2508 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2509 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2510 { DC_127_PRED, VERT_PRED } },
2511 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2512 { HOR_PRED, HOR_PRED } },
2513 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2514 { LEFT_DC_PRED, DC_PRED } },
2515 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2516 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2517 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2518 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2519 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2520 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2521 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2522 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2523 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2524 { DC_127_PRED, VERT_LEFT_PRED } },
2525 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2526 { HOR_UP_PRED, HOR_UP_PRED } },
2527 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2528 { HOR_PRED, TM_VP8_PRED } },
2530 static const struct {
2531 uint8_t needs_left:1;
2532 uint8_t needs_top:1;
2533 uint8_t needs_topleft:1;
2534 uint8_t needs_topright:1;
2535 uint8_t invert_left:1;
2536 } edges[N_INTRA_PRED_MODES] = {
2537 [VERT_PRED] = { .needs_top = 1 },
2538 [HOR_PRED] = { .needs_left = 1 },
2539 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2540 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2541 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2542 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2543 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2544 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2545 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2546 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2547 [LEFT_DC_PRED] = { .needs_left = 1 },
2548 [TOP_DC_PRED] = { .needs_top = 1 },
2549 [DC_128_PRED] = { 0 },
2550 [DC_127_PRED] = { 0 },
2551 [DC_129_PRED] = { 0 }
2554 av_assert2(mode >= 0 && mode < 10);
2555 mode = mode_conv[mode][have_left][have_top];
2556 if (edges[mode].needs_top) {
2557 uint8_t *top, *topleft;
2558 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2559 int n_px_need_tr = 0;
2561 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2564 // if top of sb64-row, use s->intra_pred_data[] instead of
2565 // dst[-stride] for intra prediction (it contains pre- instead of
2566 // post-loopfilter data)
2568 top = !(row & 7) && !y ?
2569 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2570 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2572 topleft = !(row & 7) && !y ?
2573 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2574 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2575 &dst_inner[-stride_inner];
2579 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2580 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2581 n_px_need + n_px_need_tr <= n_px_have) {
2585 if (n_px_need <= n_px_have) {
2586 memcpy(*a, top, n_px_need * bytesperpixel);
2588 #define memset_bpp(c, i1, v, i2, num) do { \
2589 if (bytesperpixel == 1) { \
2590 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2592 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2593 for (n = 0; n < (num); n++) { \
2594 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2598 memcpy(*a, top, n_px_have * bytesperpixel);
2599 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2602 #define memset_val(c, val, num) do { \
2603 if (bytesperpixel == 1) { \
2604 memset((c), (val), (num)); \
2607 for (n = 0; n < (num); n++) { \
2608 AV_WN16A(&(c)[n * 2], (val)); \
2612 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2614 if (edges[mode].needs_topleft) {
2615 if (have_left && have_top) {
2616 #define assign_bpp(c, i1, v, i2) do { \
2617 if (bytesperpixel == 1) { \
2618 (c)[(i1)] = (v)[(i2)]; \
2620 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2623 assign_bpp(*a, -1, topleft, -1);
2625 #define assign_val(c, i, v) do { \
2626 if (bytesperpixel == 1) { \
2629 AV_WN16A(&(c)[(i) * 2], (v)); \
2632 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2635 if (tx == TX_4X4 && edges[mode].needs_topright) {
2636 if (have_top && have_right &&
2637 n_px_need + n_px_need_tr <= n_px_have) {
2638 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2640 memset_bpp(*a, 4, *a, 3, 4);
2645 if (edges[mode].needs_left) {
2647 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2648 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2649 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2651 if (edges[mode].invert_left) {
2652 if (n_px_need <= n_px_have) {
2653 for (i = 0; i < n_px_need; i++)
2654 assign_bpp(l, i, &dst[i * stride], -1);
2656 for (i = 0; i < n_px_have; i++)
2657 assign_bpp(l, i, &dst[i * stride], -1);
2658 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2661 if (n_px_need <= n_px_have) {
2662 for (i = 0; i < n_px_need; i++)
2663 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2665 for (i = 0; i < n_px_have; i++)
2666 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2667 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2671 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2678 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2679 ptrdiff_t uv_off, int bytesperpixel)
2681 VP9Context *s = ctx->priv_data;
2683 int row = s->row, col = s->col;
2684 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2685 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2686 int end_x = FFMIN(2 * (s->cols - col), w4);
2687 int end_y = FFMIN(2 * (s->rows - row), h4);
2688 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2689 int uvstep1d = 1 << b->uvtx, p;
2690 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2691 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2692 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2694 for (n = 0, y = 0; y < end_y; y += step1d) {
2695 uint8_t *ptr = dst, *ptr_r = dst_r;
2696 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2697 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2698 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2700 uint8_t *a = &a_buf[32];
2701 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2702 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2704 mode = check_intra_mode(s, mode, &a, ptr_r,
2705 s->frames[CUR_FRAME].tf.f->linesize[0],
2706 ptr, s->y_stride, l,
2707 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2708 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2710 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2711 s->block + 16 * n * bytesperpixel, eob);
2713 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2714 dst += 4 * step1d * s->y_stride;
2721 step = 1 << (b->uvtx * 2);
2722 for (p = 0; p < 2; p++) {
2723 dst = s->dst[1 + p];
2724 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2725 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2726 uint8_t *ptr = dst, *ptr_r = dst_r;
2727 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2728 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2729 int mode = b->uvmode;
2730 uint8_t *a = &a_buf[32];
2731 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2733 mode = check_intra_mode(s, mode, &a, ptr_r,
2734 s->frames[CUR_FRAME].tf.f->linesize[1],
2735 ptr, s->uv_stride, l, col, x, w4, row, y,
2736 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2737 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2739 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2740 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2742 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2743 dst += 4 * uvstep1d * s->uv_stride;
2748 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2750 intra_recon(ctx, y_off, uv_off, 1);
2753 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2755 intra_recon(ctx, y_off, uv_off, 2);
2758 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2759 uint8_t *dst, ptrdiff_t dst_stride,
2760 const uint8_t *ref, ptrdiff_t ref_stride,
2761 ThreadFrame *ref_frame,
2762 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2763 int bw, int bh, int w, int h, int bytesperpixel)
2765 int mx = mv->x, my = mv->y, th;
2769 ref += y * ref_stride + x * bytesperpixel;
2772 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2773 // we use +7 because the last 7 pixels of each sbrow can be changed in
2774 // the longest loopfilter of the next sbrow
2775 th = (y + bh + 4 * !!my + 7) >> 6;
2776 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2777 if (x < !!mx * 3 || y < !!my * 3 ||
2778 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2779 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2780 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2782 bw + !!mx * 7, bh + !!my * 7,
2783 x - !!mx * 3, y - !!my * 3, w, h);
2784 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2787 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2790 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2791 uint8_t *dst_u, uint8_t *dst_v,
2792 ptrdiff_t dst_stride,
2793 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2794 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2795 ThreadFrame *ref_frame,
2796 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2797 int bw, int bh, int w, int h, int bytesperpixel)
2799 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2803 ref_u += y * src_stride_u + x * bytesperpixel;
2804 ref_v += y * src_stride_v + x * bytesperpixel;
2807 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2808 // we use +7 because the last 7 pixels of each sbrow can be changed in
2809 // the longest loopfilter of the next sbrow
2810 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2811 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2812 if (x < !!mx * 3 || y < !!my * 3 ||
2813 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2814 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2815 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2817 bw + !!mx * 7, bh + !!my * 7,
2818 x - !!mx * 3, y - !!my * 3, w, h);
2819 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2820 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2822 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2823 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2825 bw + !!mx * 7, bh + !!my * 7,
2826 x - !!mx * 3, y - !!my * 3, w, h);
2827 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2828 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2830 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2831 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2835 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2836 px, py, pw, ph, bw, bh, w, h, i) \
2837 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2838 mv, bw, bh, w, h, bytesperpixel)
2839 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2840 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2841 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2842 row, col, mv, bw, bh, w, h, bytesperpixel)
2844 #define FN(x) x##_8bpp
2845 #define BYTES_PER_PIXEL 1
2846 #include "vp9_mc_template.c"
2848 #undef BYTES_PER_PIXEL
2849 #define FN(x) x##_16bpp
2850 #define BYTES_PER_PIXEL 2
2851 #include "vp9_mc_template.c"
2853 #undef mc_chroma_dir
2855 #undef BYTES_PER_PIXEL
2858 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2859 vp9_mc_func (*mc)[2],
2860 uint8_t *dst, ptrdiff_t dst_stride,
2861 const uint8_t *ref, ptrdiff_t ref_stride,
2862 ThreadFrame *ref_frame,
2863 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2864 int px, int py, int pw, int ph,
2865 int bw, int bh, int w, int h, int bytesperpixel,
2866 const uint16_t *scale, const uint8_t *step)
2868 if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2869 s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2870 mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
2871 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2873 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2875 int refbw_m1, refbh_m1;
2879 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2880 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2881 // BUG libvpx seems to scale the two components separately. This introduces
2882 // rounding errors but we have to reproduce them to be exactly compatible
2883 // with the output from libvpx...
2884 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2885 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2889 ref += y * ref_stride + x * bytesperpixel;
2892 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2893 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2894 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2895 // we use +7 because the last 7 pixels of each sbrow can be changed in
2896 // the longest loopfilter of the next sbrow
2897 th = (y + refbh_m1 + 4 + 7) >> 6;
2898 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2899 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2900 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2901 ref - 3 * ref_stride - 3 * bytesperpixel,
2903 refbw_m1 + 8, refbh_m1 + 8,
2904 x - 3, y - 3, w, h);
2905 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2908 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2912 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2913 vp9_mc_func (*mc)[2],
2914 uint8_t *dst_u, uint8_t *dst_v,
2915 ptrdiff_t dst_stride,
2916 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2917 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2918 ThreadFrame *ref_frame,
2919 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2920 int px, int py, int pw, int ph,
2921 int bw, int bh, int w, int h, int bytesperpixel,
2922 const uint16_t *scale, const uint8_t *step)
2924 if (s->frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2925 s->frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2926 mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
2927 ref_v, src_stride_v, ref_frame,
2928 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2931 int refbw_m1, refbh_m1;
2936 // BUG https://code.google.com/p/webm/issues/detail?id=820
2937 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2938 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2940 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2941 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2944 // BUG https://code.google.com/p/webm/issues/detail?id=820
2945 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2946 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2948 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2949 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2954 ref_u += y * src_stride_u + x * bytesperpixel;
2955 ref_v += y * src_stride_v + x * bytesperpixel;
2958 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2959 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2960 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2961 // we use +7 because the last 7 pixels of each sbrow can be changed in
2962 // the longest loopfilter of the next sbrow
2963 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2964 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2965 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2966 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2967 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2969 refbw_m1 + 8, refbh_m1 + 8,
2970 x - 3, y - 3, w, h);
2971 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2972 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2974 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2975 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2977 refbw_m1 + 8, refbh_m1 + 8,
2978 x - 3, y - 3, w, h);
2979 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2980 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2982 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2983 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2988 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2989 px, py, pw, ph, bw, bh, w, h, i) \
2990 mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2991 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2992 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2993 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2994 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2995 mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2996 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2997 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2999 #define FN(x) x##_scaled_8bpp
3000 #define BYTES_PER_PIXEL 1
3001 #include "vp9_mc_template.c"
3003 #undef BYTES_PER_PIXEL
3004 #define FN(x) x##_scaled_16bpp
3005 #define BYTES_PER_PIXEL 2
3006 #include "vp9_mc_template.c"
3008 #undef mc_chroma_dir
3010 #undef BYTES_PER_PIXEL
3013 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3015 VP9Context *s = ctx->priv_data;
3017 int row = s->row, col = s->col;
3019 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3020 if (bytesperpixel == 1) {
3021 inter_pred_scaled_8bpp(ctx);
3023 inter_pred_scaled_16bpp(ctx);
3026 if (bytesperpixel == 1) {
3027 inter_pred_8bpp(ctx);
3029 inter_pred_16bpp(ctx);
3033 /* mostly copied intra_recon() */
3035 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3036 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3037 int end_x = FFMIN(2 * (s->cols - col), w4);
3038 int end_y = FFMIN(2 * (s->rows - row), h4);
3039 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3040 int uvstep1d = 1 << b->uvtx, p;
3041 uint8_t *dst = s->dst[0];
3044 for (n = 0, y = 0; y < end_y; y += step1d) {
3046 for (x = 0; x < end_x; x += step1d,
3047 ptr += 4 * step1d * bytesperpixel, n += step) {
3048 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3051 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3052 s->block + 16 * n * bytesperpixel, eob);
3054 dst += 4 * s->y_stride * step1d;
3060 step = 1 << (b->uvtx * 2);
3061 for (p = 0; p < 2; p++) {
3062 dst = s->dst[p + 1];
3063 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3065 for (x = 0; x < end_x; x += uvstep1d,
3066 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3067 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3070 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3071 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3073 dst += 4 * uvstep1d * s->uv_stride;
3079 static void inter_recon_8bpp(AVCodecContext *ctx)
3081 inter_recon(ctx, 1);
3084 static void inter_recon_16bpp(AVCodecContext *ctx)
3086 inter_recon(ctx, 2);
3089 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3090 int row_and_7, int col_and_7,
3091 int w, int h, int col_end, int row_end,
3092 enum TxfmMode tx, int skip_inter)
3094 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3095 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3097 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3098 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3099 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3100 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3102 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3103 // edges. This means that for UV, we work on two subsampled blocks at
3104 // a time, and we only use the topleft block's mode information to set
3105 // things like block strength. Thus, for any block size smaller than
3106 // 16x16, ignore the odd portion of the block.
3107 if (tx == TX_4X4 && (ss_v | ss_h)) {
3122 if (tx == TX_4X4 && !skip_inter) {
3123 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3124 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3125 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3127 for (y = row_and_7; y < h + row_and_7; y++) {
3128 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3130 mask[0][y][1] |= m_row_8;
3131 mask[0][y][2] |= m_row_4;
3132 // for odd lines, if the odd col is not being filtered,
3133 // skip odd row also:
3140 // if a/c are even row/col and b/d are odd, and d is skipped,
3141 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3142 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3143 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3145 mask[1][y][col_mask_id] |= m_col;
3148 mask[0][y][3] |= m_col;
3150 if (ss_h && (col_end & 1))
3151 mask[1][y][3] |= (t << (w - 1)) - t;
3153 mask[1][y][3] |= m_col;
3157 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3160 int mask_id = (tx == TX_8X8);
3161 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3162 int l2 = tx + ss_h - 1, step1d;
3163 int m_row = m_col & masks[l2];
3165 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3166 // 8wd loopfilter to prevent going off the visible edge.
3167 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3168 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3169 int m_row_8 = m_row - m_row_16;
3171 for (y = row_and_7; y < h + row_and_7; y++) {
3172 mask[0][y][0] |= m_row_16;
3173 mask[0][y][1] |= m_row_8;
3176 for (y = row_and_7; y < h + row_and_7; y++)
3177 mask[0][y][mask_id] |= m_row;
3182 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3183 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3184 mask[1][y][0] |= m_col;
3185 if (y - row_and_7 == h - 1)
3186 mask[1][y][1] |= m_col;
3188 for (y = row_and_7; y < h + row_and_7; y += step1d)
3189 mask[1][y][mask_id] |= m_col;
3191 } else if (tx != TX_4X4) {
3194 mask_id = (tx == TX_8X8) || (h == ss_v);
3195 mask[1][row_and_7][mask_id] |= m_col;
3196 mask_id = (tx == TX_8X8) || (w == ss_h);
3197 for (y = row_and_7; y < h + row_and_7; y++)
3198 mask[0][y][mask_id] |= t;
3200 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3202 for (y = row_and_7; y < h + row_and_7; y++) {
3203 mask[0][y][2] |= t4;
3204 mask[0][y][1] |= t8;
3206 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3211 static void decode_b(AVCodecContext *ctx, int row, int col,
3212 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3213 enum BlockLevel bl, enum BlockPartition bp)
3215 VP9Context *s = ctx->priv_data;
3217 enum BlockSize bs = bl * 3 + bp;
3218 int bytesperpixel = s->bytesperpixel;
3219 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3221 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3227 s->min_mv.x = -(128 + col * 64);
3228 s->min_mv.y = -(128 + row * 64);
3229 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3230 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3236 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3237 (s->ss_v && h4 * 2 == (1 << b->tx)));
3242 if (bytesperpixel == 1) {
3243 has_coeffs = decode_coeffs_8bpp(ctx);
3245 has_coeffs = decode_coeffs_16bpp(ctx);
3247 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3249 memset(&s->above_skip_ctx[col], 1, w4);
3250 memset(&s->left_skip_ctx[s->row7], 1, h4);
3255 #define SPLAT_ZERO_CTX(v, n) \
3257 case 1: v = 0; break; \
3258 case 2: AV_ZERO16(&v); break; \
3259 case 4: AV_ZERO32(&v); break; \
3260 case 8: AV_ZERO64(&v); break; \
3261 case 16: AV_ZERO128(&v); break; \
3263 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3265 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3266 if (s->ss_##dir2) { \
3267 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3268 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3270 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3271 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3276 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3277 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3278 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3279 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3282 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3283 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3284 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3285 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3290 s->block += w4 * h4 * 64 * bytesperpixel;
3291 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3292 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3293 s->eob += 4 * w4 * h4;
3294 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3295 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3301 // emulated overhangs if the stride of the target buffer can't hold. This
3302 // makes it possible to support emu-edge and so on even if we have large block
3304 emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
3305 (row + h4) > s->rows;
3306 emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
3307 (row + h4) > s->rows;
3309 s->dst[0] = s->tmp_y;
3312 s->dst[0] = f->data[0] + yoff;
3313 s->y_stride = f->linesize[0];
3316 s->dst[1] = s->tmp_uv[0];
3317 s->dst[2] = s->tmp_uv[1];
3320 s->dst[1] = f->data[1] + uvoff;
3321 s->dst[2] = f->data[2] + uvoff;
3322 s->uv_stride = f->linesize[1];
3326 intra_recon_16bpp(ctx, yoff, uvoff);
3328 intra_recon_8bpp(ctx, yoff, uvoff);
3332 inter_recon_16bpp(ctx);
3334 inter_recon_8bpp(ctx);
3338 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3340 for (n = 0; o < w; n++) {
3345 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
3346 s->tmp_y + o * bytesperpixel, 128, h, 0, 0);
3352 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3353 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3355 for (n = s->ss_h; o < w; n++) {
3360 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
3361 s->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
3362 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
3363 s->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
3369 // pick filter level and find edges to apply filter to
3370 if (s->filter.level &&
3371 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3372 [b->mode[3] != ZEROMV]) > 0) {
3373 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3374 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3376 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3377 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3378 if (s->ss_h || s->ss_v)
3379 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3380 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3381 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3382 b->uvtx, skip_inter);
3384 if (!s->filter.lim_lut[lvl]) {
3385 int sharp = s->filter.sharpness;
3389 limit >>= (sharp + 3) >> 2;
3390 limit = FFMIN(limit, 9 - sharp);
3392 limit = FFMAX(limit, 1);
3394 s->filter.lim_lut[lvl] = limit;
3395 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3401 s->block += w4 * h4 * 64 * bytesperpixel;
3402 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3403 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3404 s->eob += 4 * w4 * h4;
3405 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3406 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3410 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3411 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3413 VP9Context *s = ctx->priv_data;
3414 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3415 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3416 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3417 s->prob.p.partition[bl][c];
3418 enum BlockPartition bp;
3419 ptrdiff_t hbs = 4 >> bl;
3420 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3421 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3422 int bytesperpixel = s->bytesperpixel;
3425 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3426 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3427 } else if (col + hbs < s->cols) { // FIXME why not <=?
3428 if (row + hbs < s->rows) { // FIXME why not <=?
3429 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3431 case PARTITION_NONE:
3432 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3435 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3436 yoff += hbs * 8 * y_stride;
3437 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3438 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3441 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3442 yoff += hbs * 8 * bytesperpixel;
3443 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3444 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3446 case PARTITION_SPLIT:
3447 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3448 decode_sb(ctx, row, col + hbs, lflvl,
3449 yoff + 8 * hbs * bytesperpixel,
3450 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3451 yoff += hbs * 8 * y_stride;
3452 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3453 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3454 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3455 yoff + 8 * hbs * bytesperpixel,
3456 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3461 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3462 bp = PARTITION_SPLIT;
3463 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3464 decode_sb(ctx, row, col + hbs, lflvl,
3465 yoff + 8 * hbs * bytesperpixel,
3466 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3469 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3471 } else if (row + hbs < s->rows) { // FIXME why not <=?
3472 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3473 bp = PARTITION_SPLIT;
3474 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3475 yoff += hbs * 8 * y_stride;
3476 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3477 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3480 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3483 bp = PARTITION_SPLIT;
3484 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3486 s->counts.partition[bl][c][bp]++;
3489 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3490 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3492 VP9Context *s = ctx->priv_data;
3494 ptrdiff_t hbs = 4 >> bl;
3495 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3496 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3497 int bytesperpixel = s->bytesperpixel;
3500 av_assert2(b->bl == BL_8X8);
3501 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3502 } else if (s->b->bl == bl) {
3503 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3504 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3505 yoff += hbs * 8 * y_stride;
3506 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3507 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3508 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3509 yoff += hbs * 8 * bytesperpixel;
3510 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3511 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3514 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3515 if (col + hbs < s->cols) { // FIXME why not <=?
3516 if (row + hbs < s->rows) {
3517 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3518 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3519 yoff += hbs * 8 * y_stride;
3520 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3521 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3522 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3523 yoff + 8 * hbs * bytesperpixel,
3524 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3526 yoff += hbs * 8 * bytesperpixel;
3527 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3528 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3530 } else if (row + hbs < s->rows) {
3531 yoff += hbs * 8 * y_stride;
3532 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3533 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3538 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3539 uint8_t *lvl, uint8_t (*mask)[4],
3540 uint8_t *dst, ptrdiff_t ls)
3542 int y, x, bytesperpixel = s->bytesperpixel;
3544 // filter edges between columns (e.g. block1 | block2)
3545 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3546 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3547 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3548 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3549 unsigned hm = hm1 | hm2 | hm13 | hm23;
3551 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3554 int L = *l, H = L >> 4;
3555 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3557 if (hmask1[0] & x) {
3558 if (hmask2[0] & x) {
3559 av_assert2(l[8 << ss_v] == L);
3560 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3562 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3564 } else if (hm2 & x) {
3567 E |= s->filter.mblim_lut[L] << 8;
3568 I |= s->filter.lim_lut[L] << 8;
3569 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3571 [0](ptr, ls, E, I, H);
3573 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3574 [0](ptr, ls, E, I, H);
3576 } else if (hm2 & x) {
3577 int L = l[8 << ss_v], H = L >> 4;
3578 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3580 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3581 [0](ptr + 8 * ls, ls, E, I, H);
3589 int L = *l, H = L >> 4;
3590 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3595 E |= s->filter.mblim_lut[L] << 8;
3596 I |= s->filter.lim_lut[L] << 8;
3597 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3599 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3601 } else if (hm23 & x) {
3602 int L = l[8 << ss_v], H = L >> 4;
3603 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3605 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3613 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3614 uint8_t *lvl, uint8_t (*mask)[4],
3615 uint8_t *dst, ptrdiff_t ls)
3617 int y, x, bytesperpixel = s->bytesperpixel;
3620 // filter edges between rows (e.g. ------)
3622 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3623 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3624 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3626 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3629 int L = *l, H = L >> 4;
3630 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3633 if (vmask[0] & (x << (1 + ss_h))) {
3634 av_assert2(l[1 + ss_h] == L);
3635 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3637 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3639 } else if (vm & (x << (1 + ss_h))) {
3642 E |= s->filter.mblim_lut[L] << 8;
3643 I |= s->filter.lim_lut[L] << 8;
3644 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3645 [!!(vmask[1] & (x << (1 + ss_h)))]
3646 [1](ptr, ls, E, I, H);
3648 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3649 [1](ptr, ls, E, I, H);
3651 } else if (vm & (x << (1 + ss_h))) {
3652 int L = l[1 + ss_h], H = L >> 4;
3653 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3655 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3656 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3661 int L = *l, H = L >> 4;
3662 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3664 if (vm3 & (x << (1 + ss_h))) {
3667 E |= s->filter.mblim_lut[L] << 8;
3668 I |= s->filter.lim_lut[L] << 8;
3669 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3671 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3673 } else if (vm3 & (x << (1 + ss_h))) {
3674 int L = l[1 + ss_h], H = L >> 4;
3675 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3677 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3690 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3691 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3693 VP9Context *s = ctx->priv_data;
3694 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3695 uint8_t *dst = f->data[0] + yoff;
3696 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3697 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3700 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3701 // if you think of them as acting on a 8x8 block max, we can interleave
3702 // each v/h within the single x loop, but that only works if we work on
3703 // 8 pixel blocks, and we won't always do that (we want at least 16px
3704 // to use SSE2 optimizations, perhaps 32 for AVX2)
3706 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3707 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3709 for (p = 0; p < 2; p++) {
3710 dst = f->data[1 + p] + uvoff;
3711 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3712 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3716 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3718 int sb_start = ( idx * n) >> log2_n;
3719 int sb_end = ((idx + 1) * n) >> log2_n;
3720 *start = FFMIN(sb_start, n) << 3;
3721 *end = FFMIN(sb_end, n) << 3;
3724 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3725 int max_count, int update_factor)
3727 unsigned ct = ct0 + ct1, p2, p1;
3733 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3734 p2 = av_clip(p2, 1, 255);
3735 ct = FFMIN(ct, max_count);
3736 update_factor = FASTDIV(update_factor * ct, max_count);
3738 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3739 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3742 static void adapt_probs(VP9Context *s)
3745 prob_context *p = &s->prob_ctx[s->framectxid].p;
3746 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3749 for (i = 0; i < 4; i++)
3750 for (j = 0; j < 2; j++)
3751 for (k = 0; k < 2; k++)
3752 for (l = 0; l < 6; l++)
3753 for (m = 0; m < 6; m++) {
3754 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3755 unsigned *e = s->counts.eob[i][j][k][l][m];
3756 unsigned *c = s->counts.coef[i][j][k][l][m];
3758 if (l == 0 && m >= 3) // dc only has 3 pt
3761 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3762 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3763 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3766 if (s->keyframe || s->intraonly) {
3767 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3768 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3769 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3770 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3775 for (i = 0; i < 3; i++)
3776 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3779 for (i = 0; i < 4; i++)
3780 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3783 if (s->comppredmode == PRED_SWITCHABLE) {
3784 for (i = 0; i < 5; i++)
3785 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3789 if (s->comppredmode != PRED_SINGLEREF) {
3790 for (i = 0; i < 5; i++)
3791 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3792 s->counts.comp_ref[i][1], 20, 128);
3795 if (s->comppredmode != PRED_COMPREF) {
3796 for (i = 0; i < 5; i++) {
3797 uint8_t *pp = p->single_ref[i];
3798 unsigned (*c)[2] = s->counts.single_ref[i];
3800 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3801 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3805 // block partitioning
3806 for (i = 0; i < 4; i++)
3807 for (j = 0; j < 4; j++) {
3808 uint8_t *pp = p->partition[i][j];
3809 unsigned *c = s->counts.partition[i][j];
3811 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3812 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3813 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3817 if (s->txfmmode == TX_SWITCHABLE) {
3818 for (i = 0; i < 2; i++) {
3819 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3821 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3822 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3823 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3824 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3825 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3826 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3830 // interpolation filter
3831 if (s->filtermode == FILTER_SWITCHABLE) {
3832 for (i = 0; i < 4; i++) {
3833 uint8_t *pp = p->filter[i];
3834 unsigned *c = s->counts.filter[i];
3836 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3837 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3842 for (i = 0; i < 7; i++) {
3843 uint8_t *pp = p->mv_mode[i];
3844 unsigned *c = s->counts.mv_mode[i];
3846 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3847 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3848 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3853 uint8_t *pp = p->mv_joint;
3854 unsigned *c = s->counts.mv_joint;
3856 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3857 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3858 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3862 for (i = 0; i < 2; i++) {
3864 unsigned *c, (*c2)[2], sum;
3866 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3867 s->counts.mv_comp[i].sign[1], 20, 128);
3869 pp = p->mv_comp[i].classes;
3870 c = s->counts.mv_comp[i].classes;
3871 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3872 adapt_prob(&pp[0], c[0], sum, 20, 128);
3874 adapt_prob(&pp[1], c[1], sum, 20, 128);
3876 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3877 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3879 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3880 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3882 adapt_prob(&pp[6], c[6], sum, 20, 128);
3883 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3884 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3885 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3887 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3888 s->counts.mv_comp[i].class0[1], 20, 128);
3889 pp = p->mv_comp[i].bits;
3890 c2 = s->counts.mv_comp[i].bits;
3891 for (j = 0; j < 10; j++)
3892 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3894 for (j = 0; j < 2; j++) {
3895 pp = p->mv_comp[i].class0_fp[j];
3896 c = s->counts.mv_comp[i].class0_fp[j];
3897 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3898 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3899 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3901 pp = p->mv_comp[i].fp;
3902 c = s->counts.mv_comp[i].fp;
3903 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3904 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3905 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3907 if (s->highprecisionmvs) {
3908 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3909 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3910 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3911 s->counts.mv_comp[i].hp[1], 20, 128);
3916 for (i = 0; i < 4; i++) {
3917 uint8_t *pp = p->y_mode[i];
3918 unsigned *c = s->counts.y_mode[i], sum, s2;
3920 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3921 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3922 sum -= c[TM_VP8_PRED];
3923 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3924 sum -= c[VERT_PRED];
3925 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3926 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3928 adapt_prob(&pp[3], s2, sum, 20, 128);
3930 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3931 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3932 sum -= c[DIAG_DOWN_LEFT_PRED];
3933 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3934 sum -= c[VERT_LEFT_PRED];
3935 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3936 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3940 for (i = 0; i < 10; i++) {
3941 uint8_t *pp = p->uv_mode[i];
3942 unsigned *c = s->counts.uv_mode[i], sum, s2;
3944 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3945 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3946 sum -= c[TM_VP8_PRED];
3947 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3948 sum -= c[VERT_PRED];
3949 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3950 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3952 adapt_prob(&pp[3], s2, sum, 20, 128);
3954 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3955 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3956 sum -= c[DIAG_DOWN_LEFT_PRED];
3957 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3958 sum -= c[VERT_LEFT_PRED];
3959 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3960 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3964 static void free_buffers(VP9Context *s)
3966 av_freep(&s->intra_pred_data[0]);
3967 av_freep(&s->b_base);
3968 av_freep(&s->block_base);
3971 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3973 VP9Context *s = ctx->priv_data;
3976 for (i = 0; i < 3; i++) {
3977 if (s->frames[i].tf.f->data[0])
3978 vp9_unref_frame(ctx, &s->frames[i]);
3979 av_frame_free(&s->frames[i].tf.f);
3981 for (i = 0; i < 8; i++) {
3982 if (s->refs[i].f->data[0])
3983 ff_thread_release_buffer(ctx, &s->refs[i]);
3984 av_frame_free(&s->refs[i].f);
3985 if (s->next_refs[i].f->data[0])
3986 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3987 av_frame_free(&s->next_refs[i].f);
3997 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3998 int *got_frame, AVPacket *pkt)
4000 const uint8_t *data = pkt->data;
4001 int size = pkt->size;
4002 VP9Context *s = ctx->priv_data;
4003 int res, tile_row, tile_col, i, ref, row, col;
4004 int retain_segmap_ref = s->frames[REF_FRAME_SEGMAP].segmentation_map &&
4005 (!s->segmentation.enabled || !s->segmentation.update_map);
4006 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4010 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4012 } else if (res == 0) {
4013 if (!s->refs[ref].f->data[0]) {
4014 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4015 return AVERROR_INVALIDDATA;
4017 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4019 ((AVFrame *)frame)->pkt_pts = pkt->pts;
4020 ((AVFrame *)frame)->pkt_dts = pkt->dts;
4021 for (i = 0; i < 8; i++) {
4022 if (s->next_refs[i].f->data[0])
4023 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4024 if (s->refs[i].f->data[0] &&
4025 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4034 if (!retain_segmap_ref || s->keyframe || s->intraonly) {
4035 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4036 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4037 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4038 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4041 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4042 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4043 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4044 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4046 if (s->frames[CUR_FRAME].tf.f->data[0])
4047 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4048 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4050 f = s->frames[CUR_FRAME].tf.f;
4051 f->key_frame = s->keyframe;
4052 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4053 ls_y = f->linesize[0];
4054 ls_uv =f->linesize[1];
4056 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0] &&
4057 (s->frames[REF_FRAME_MVPAIR].tf.f->width != s->frames[CUR_FRAME].tf.f->width ||
4058 s->frames[REF_FRAME_MVPAIR].tf.f->height != s->frames[CUR_FRAME].tf.f->height)) {
4059 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4063 for (i = 0; i < 8; i++) {
4064 if (s->next_refs[i].f->data[0])
4065 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4066 if (s->refreshrefmask & (1 << i)) {
4067 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4068 } else if (s->refs[i].f->data[0]) {
4069 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4075 // main tile decode loop
4076 bytesperpixel = s->bytesperpixel;
4077 memset(s->above_partition_ctx, 0, s->cols);
4078 memset(s->above_skip_ctx, 0, s->cols);
4079 if (s->keyframe || s->intraonly) {
4080 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4082 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4084 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4085 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4086 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4087 memset(s->above_segpred_ctx, 0, s->cols);
4088 s->pass = s->frames[CUR_FRAME].uses_2pass =
4089 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4090 if ((res = update_block_buffers(ctx)) < 0) {
4091 av_log(ctx, AV_LOG_ERROR,
4092 "Failed to allocate block buffers\n");
4095 if (s->refreshctx && s->parallelmode) {
4098 for (i = 0; i < 4; i++) {
4099 for (j = 0; j < 2; j++)
4100 for (k = 0; k < 2; k++)
4101 for (l = 0; l < 6; l++)
4102 for (m = 0; m < 6; m++)
4103 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4104 s->prob.coef[i][j][k][l][m], 3);
4105 if (s->txfmmode == i)
4108 s->prob_ctx[s->framectxid].p = s->prob.p;
4109 ff_thread_finish_setup(ctx);
4110 } else if (!s->refreshctx) {
4111 ff_thread_finish_setup(ctx);
4117 s->block = s->block_base;
4118 s->uvblock[0] = s->uvblock_base[0];
4119 s->uvblock[1] = s->uvblock_base[1];
4120 s->eob = s->eob_base;
4121 s->uveob[0] = s->uveob_base[0];
4122 s->uveob[1] = s->uveob_base[1];
4124 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4125 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4126 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4128 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4131 if (tile_col == s->tiling.tile_cols - 1 &&
4132 tile_row == s->tiling.tile_rows - 1) {
4135 tile_size = AV_RB32(data);
4139 if (tile_size > size) {
4140 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4141 return AVERROR_INVALIDDATA;
4143 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4144 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4145 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4146 return AVERROR_INVALIDDATA;
4153 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4154 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4155 struct VP9Filter *lflvl_ptr = s->lflvl;
4156 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4158 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4159 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4160 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4163 memset(s->left_partition_ctx, 0, 8);
4164 memset(s->left_skip_ctx, 0, 8);
4165 if (s->keyframe || s->intraonly) {
4166 memset(s->left_mode_ctx, DC_PRED, 16);
4168 memset(s->left_mode_ctx, NEARESTMV, 8);
4170 memset(s->left_y_nnz_ctx, 0, 16);
4171 memset(s->left_uv_nnz_ctx, 0, 32);
4172 memset(s->left_segpred_ctx, 0, 8);
4174 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4177 for (col = s->tiling.tile_col_start;
4178 col < s->tiling.tile_col_end;
4179 col += 8, yoff2 += 64 * bytesperpixel,
4180 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4181 // FIXME integrate with lf code (i.e. zero after each
4182 // use, similar to invtxfm coefficients, or similar)
4184 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4188 decode_sb_mem(ctx, row, col, lflvl_ptr,
4189 yoff2, uvoff2, BL_64X64);
4191 decode_sb(ctx, row, col, lflvl_ptr,
4192 yoff2, uvoff2, BL_64X64);
4196 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4204 // backup pre-loopfilter reconstruction data for intra
4205 // prediction of next row of sb64s
4206 if (row + 8 < s->rows) {
4207 memcpy(s->intra_pred_data[0],
4208 f->data[0] + yoff + 63 * ls_y,
4209 8 * s->cols * bytesperpixel);
4210 memcpy(s->intra_pred_data[1],
4211 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4212 8 * s->cols * bytesperpixel >> s->ss_h);
4213 memcpy(s->intra_pred_data[2],
4214 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4215 8 * s->cols * bytesperpixel >> s->ss_h);
4218 // loopfilter one row
4219 if (s->filter.level) {
4222 lflvl_ptr = s->lflvl;
4223 for (col = 0; col < s->cols;
4224 col += 8, yoff2 += 64 * bytesperpixel,
4225 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4226 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4230 // FIXME maybe we can make this more finegrained by running the
4231 // loopfilter per-block instead of after each sbrow
4232 // In fact that would also make intra pred left preparation easier?
4233 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4237 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4239 ff_thread_finish_setup(ctx);
4241 } while (s->pass++ == 1);
4242 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4245 for (i = 0; i < 8; i++) {
4246 if (s->refs[i].f->data[0])
4247 ff_thread_release_buffer(ctx, &s->refs[i]);
4248 if (s->next_refs[i].f->data[0] &&
4249 (res = ff_thread_ref_frame(&s->refs[i], &s->next_refs[i])) < 0)
4253 if (!s->invisible) {
4254 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4262 static void vp9_decode_flush(AVCodecContext *ctx)
4264 VP9Context *s = ctx->priv_data;
4267 for (i = 0; i < 3; i++)
4268 vp9_unref_frame(ctx, &s->frames[i]);
4269 for (i = 0; i < 8; i++)
4270 ff_thread_release_buffer(ctx, &s->refs[i]);
4273 static int init_frames(AVCodecContext *ctx)
4275 VP9Context *s = ctx->priv_data;
4278 for (i = 0; i < 3; i++) {
4279 s->frames[i].tf.f = av_frame_alloc();
4280 if (!s->frames[i].tf.f) {
4281 vp9_decode_free(ctx);
4282 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4283 return AVERROR(ENOMEM);
4286 for (i = 0; i < 8; i++) {
4287 s->refs[i].f = av_frame_alloc();
4288 s->next_refs[i].f = av_frame_alloc();
4289 if (!s->refs[i].f || !s->next_refs[i].f) {
4290 vp9_decode_free(ctx);
4291 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4292 return AVERROR(ENOMEM);
4299 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4301 VP9Context *s = ctx->priv_data;
4303 ctx->internal->allocate_progress = 1;
4305 s->filter.sharpness = -1;
4307 return init_frames(ctx);
4310 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4312 return init_frames(avctx);
4315 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4318 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4320 // detect size changes in other threads
4321 if (s->intra_pred_data[0] &&
4322 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols ||
4323 s->rows != ssrc->rows || s->bpp != ssrc->bpp)) {
4327 for (i = 0; i < 3; i++) {
4328 if (s->frames[i].tf.f->data[0])
4329 vp9_unref_frame(dst, &s->frames[i]);
4330 if (ssrc->frames[i].tf.f->data[0]) {
4331 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4335 for (i = 0; i < 8; i++) {
4336 if (s->refs[i].f->data[0])
4337 ff_thread_release_buffer(dst, &s->refs[i]);
4338 if (ssrc->next_refs[i].f->data[0]) {
4339 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4344 s->invisible = ssrc->invisible;
4345 s->keyframe = ssrc->keyframe;
4346 s->intraonly = ssrc->intraonly;
4347 s->ss_v = ssrc->ss_v;
4348 s->ss_h = ssrc->ss_h;
4349 s->segmentation.enabled = ssrc->segmentation.enabled;
4350 s->segmentation.update_map = ssrc->segmentation.update_map;
4351 s->segmentation.absolute_vals = ssrc->segmentation.absolute_vals;
4352 s->bytesperpixel = ssrc->bytesperpixel;
4354 s->bpp_index = ssrc->bpp_index;
4355 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4356 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4357 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4358 sizeof(s->segmentation.feat));
4363 static const AVProfile profiles[] = {
4364 { FF_PROFILE_VP9_0, "Profile 0" },
4365 { FF_PROFILE_VP9_1, "Profile 1" },
4366 { FF_PROFILE_VP9_2, "Profile 2" },
4367 { FF_PROFILE_VP9_3, "Profile 3" },
4368 { FF_PROFILE_UNKNOWN },
4371 AVCodec ff_vp9_decoder = {
4373 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4374 .type = AVMEDIA_TYPE_VIDEO,
4375 .id = AV_CODEC_ID_VP9,
4376 .priv_data_size = sizeof(VP9Context),
4377 .init = vp9_decode_init,
4378 .close = vp9_decode_free,
4379 .decode = vp9_decode_frame,
4380 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4381 .flush = vp9_decode_flush,
4382 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4383 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4384 .profiles = NULL_IF_CONFIG_SMALL(profiles),