2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
160 uint8_t skip_enabled;
169 unsigned log2_tile_cols, log2_tile_rows;
170 unsigned tile_cols, tile_rows;
171 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
173 unsigned sb_cols, sb_rows, rows, cols;
176 uint8_t coef[4][2][2][6][6][3];
180 uint8_t coef[4][2][2][6][6][11];
185 unsigned y_mode[4][10];
186 unsigned uv_mode[10][10];
187 unsigned filter[4][3];
188 unsigned mv_mode[7][4];
189 unsigned intra[4][2];
191 unsigned single_ref[5][2][2];
192 unsigned comp_ref[5][2];
193 unsigned tx32p[2][4];
194 unsigned tx16p[2][3];
197 unsigned mv_joint[4];
200 unsigned classes[11];
202 unsigned bits[10][2];
203 unsigned class0_fp[2][4];
205 unsigned class0_hp[2];
208 unsigned partition[4][4][4];
209 unsigned coef[4][2][2][6][6][3];
210 unsigned eob[4][2][2][6][6][2];
212 enum TxfmMode txfmmode;
213 enum CompPredMode comppredmode;
215 // contextual (left/above) cache
216 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
217 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
218 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
219 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
220 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
228 uint8_t *above_partition_ctx;
229 uint8_t *above_mode_ctx;
230 // FIXME maybe merge some of the below in a flags field?
231 uint8_t *above_y_nnz_ctx;
232 uint8_t *above_uv_nnz_ctx[2];
233 uint8_t *above_skip_ctx; // 1bit
234 uint8_t *above_txfm_ctx; // 2bit
235 uint8_t *above_segpred_ctx; // 1bit
236 uint8_t *above_intra_ctx; // 1bit
237 uint8_t *above_comp_ctx; // 1bit
238 uint8_t *above_ref_ctx; // 2bit
239 uint8_t *above_filter_ctx;
240 VP56mv (*above_mv_ctx)[2];
243 uint8_t *intra_pred_data[3];
244 struct VP9Filter *lflvl;
245 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
247 // block reconstruction intermediates
248 int block_alloc_using_2pass;
249 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
250 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
251 struct { int x, y; } min_mv, max_mv;
252 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
253 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
254 uint16_t mvscale[3][2];
255 uint8_t mvstep[3][2];
258 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
260 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
261 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
263 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
264 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
268 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
270 VP9Context *s = ctx->priv_data;
273 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
275 sz = 64 * s->sb_cols * s->sb_rows;
276 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
277 ff_thread_release_buffer(ctx, &f->tf);
278 return AVERROR(ENOMEM);
281 f->segmentation_map = f->extradata->data;
282 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
287 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
289 ff_thread_release_buffer(ctx, &f->tf);
290 av_buffer_unref(&f->extradata);
293 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
297 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
299 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
300 vp9_unref_frame(ctx, dst);
301 return AVERROR(ENOMEM);
304 dst->segmentation_map = src->segmentation_map;
306 dst->uses_2pass = src->uses_2pass;
311 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
313 VP9Context *s = ctx->priv_data;
315 int bytesperpixel = s->bytesperpixel;
317 av_assert0(w > 0 && h > 0);
319 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
325 s->sb_cols = (w + 63) >> 6;
326 s->sb_rows = (h + 63) >> 6;
327 s->cols = (w + 7) >> 3;
328 s->rows = (h + 7) >> 3;
330 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
331 av_freep(&s->intra_pred_data[0]);
332 // FIXME we slightly over-allocate here for subsampled chroma, but a little
333 // bit of padding shouldn't affect performance...
334 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
335 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
337 return AVERROR(ENOMEM);
338 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
339 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
341 assign(s->above_y_nnz_ctx, uint8_t *, 16);
342 assign(s->above_mode_ctx, uint8_t *, 16);
343 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
344 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
345 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
346 assign(s->above_partition_ctx, uint8_t *, 8);
347 assign(s->above_skip_ctx, uint8_t *, 8);
348 assign(s->above_txfm_ctx, uint8_t *, 8);
349 assign(s->above_segpred_ctx, uint8_t *, 8);
350 assign(s->above_intra_ctx, uint8_t *, 8);
351 assign(s->above_comp_ctx, uint8_t *, 8);
352 assign(s->above_ref_ctx, uint8_t *, 8);
353 assign(s->above_filter_ctx, uint8_t *, 8);
354 assign(s->lflvl, struct VP9Filter *, 1);
357 // these will be re-allocated a little later
358 av_freep(&s->b_base);
359 av_freep(&s->block_base);
361 if (s->bpp != s->last_bpp) {
362 ff_vp9dsp_init(&s->dsp, s->bpp);
363 ff_videodsp_init(&s->vdsp, s->bpp);
364 s->last_bpp = s->bpp;
370 static int update_block_buffers(AVCodecContext *ctx)
372 VP9Context *s = ctx->priv_data;
373 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
375 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
379 av_free(s->block_base);
380 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
381 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
382 if (s->frames[CUR_FRAME].uses_2pass) {
383 int sbs = s->sb_cols * s->sb_rows;
385 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
386 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
387 16 * 16 + 2 * chroma_eobs) * sbs);
388 if (!s->b_base || !s->block_base)
389 return AVERROR(ENOMEM);
390 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
391 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
392 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
393 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
394 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
396 s->b_base = av_malloc(sizeof(VP9Block));
397 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
398 16 * 16 + 2 * chroma_eobs);
399 if (!s->b_base || !s->block_base)
400 return AVERROR(ENOMEM);
401 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
402 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
403 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
404 s->uveob_base[0] = s->eob_base + 16 * 16;
405 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
407 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
412 // for some reason the sign bit is at the end, not the start, of a bit sequence
413 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
415 int v = get_bits(gb, n);
416 return get_bits1(gb) ? -v : v;
419 static av_always_inline int inv_recenter_nonneg(int v, int m)
421 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
424 // differential forward probability updates
425 static int update_prob(VP56RangeCoder *c, int p)
427 static const int inv_map_table[254] = {
428 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
429 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
430 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
431 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
432 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
433 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
434 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
435 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
436 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
437 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
438 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
439 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
440 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
441 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
442 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
443 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
444 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
445 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
450 /* This code is trying to do a differential probability update. For a
451 * current probability A in the range [1, 255], the difference to a new
452 * probability of any value can be expressed differentially as 1-A,255-A
453 * where some part of this (absolute range) exists both in positive as
454 * well as the negative part, whereas another part only exists in one
455 * half. We're trying to code this shared part differentially, i.e.
456 * times two where the value of the lowest bit specifies the sign, and
457 * the single part is then coded on top of this. This absolute difference
458 * then again has a value of [0,254], but a bigger value in this range
459 * indicates that we're further away from the original value A, so we
460 * can code this as a VLC code, since higher values are increasingly
461 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
462 * updates vs. the 'fine, exact' updates further down the range, which
463 * adds one extra dimension to this differential update model. */
465 if (!vp8_rac_get(c)) {
466 d = vp8_rac_get_uint(c, 4) + 0;
467 } else if (!vp8_rac_get(c)) {
468 d = vp8_rac_get_uint(c, 4) + 16;
469 } else if (!vp8_rac_get(c)) {
470 d = vp8_rac_get_uint(c, 5) + 32;
472 d = vp8_rac_get_uint(c, 7);
474 d = (d << 1) - 65 + vp8_rac_get(c);
478 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
479 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
482 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
484 static const enum AVColorSpace colorspaces[8] = {
485 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
486 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
488 VP9Context *s = ctx->priv_data;
489 enum AVPixelFormat res;
490 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
493 s->bpp = 8 + bits * 2;
494 s->bytesperpixel = (7 + s->bpp) >> 3;
495 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
496 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
497 static const enum AVPixelFormat pix_fmt_rgb[3] = {
498 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
500 if (ctx->profile & 1) {
501 s->ss_h = s->ss_v = 1;
502 res = pix_fmt_rgb[bits];
503 ctx->color_range = AVCOL_RANGE_JPEG;
505 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
507 return AVERROR_INVALIDDATA;
510 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
511 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
512 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
513 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
514 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
515 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
516 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
518 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
519 if (ctx->profile & 1) {
520 s->ss_h = get_bits1(&s->gb);
521 s->ss_v = get_bits1(&s->gb);
522 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
523 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
525 return AVERROR_INVALIDDATA;
526 } else if (get_bits1(&s->gb)) {
527 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
529 return AVERROR_INVALIDDATA;
532 s->ss_h = s->ss_v = 1;
533 res = pix_fmt_for_ss[bits][1][1];
540 static int decode_frame_header(AVCodecContext *ctx,
541 const uint8_t *data, int size, int *ref)
543 VP9Context *s = ctx->priv_data;
544 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
545 enum AVPixelFormat fmt = ctx->pix_fmt;
547 const uint8_t *data2;
550 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
551 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
554 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
555 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
556 return AVERROR_INVALIDDATA;
558 ctx->profile = get_bits1(&s->gb);
559 ctx->profile |= get_bits1(&s->gb) << 1;
560 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
561 if (ctx->profile > 3) {
562 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
563 return AVERROR_INVALIDDATA;
565 if (get_bits1(&s->gb)) {
566 *ref = get_bits(&s->gb, 3);
569 s->last_keyframe = s->keyframe;
570 s->keyframe = !get_bits1(&s->gb);
571 last_invisible = s->invisible;
572 s->invisible = !get_bits1(&s->gb);
573 s->errorres = get_bits1(&s->gb);
574 s->use_last_frame_mvs = !s->errorres && !last_invisible;
576 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
577 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
578 return AVERROR_INVALIDDATA;
580 if ((fmt = read_colorspace_details(ctx)) < 0)
582 // for profile 1, here follows the subsampling bits
583 s->refreshrefmask = 0xff;
584 w = get_bits(&s->gb, 16) + 1;
585 h = get_bits(&s->gb, 16) + 1;
586 if (get_bits1(&s->gb)) // display size
587 skip_bits(&s->gb, 32);
589 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
590 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
592 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
593 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
594 return AVERROR_INVALIDDATA;
596 if (ctx->profile == 1) {
597 if ((fmt = read_colorspace_details(ctx)) < 0)
600 s->ss_h = s->ss_v = 1;
603 s->bytesperpixel = 1;
604 fmt = AV_PIX_FMT_YUV420P;
605 ctx->colorspace = AVCOL_SPC_BT470BG;
606 ctx->color_range = AVCOL_RANGE_JPEG;
608 s->refreshrefmask = get_bits(&s->gb, 8);
609 w = get_bits(&s->gb, 16) + 1;
610 h = get_bits(&s->gb, 16) + 1;
611 if (get_bits1(&s->gb)) // display size
612 skip_bits(&s->gb, 32);
614 s->refreshrefmask = get_bits(&s->gb, 8);
615 s->refidx[0] = get_bits(&s->gb, 3);
616 s->signbias[0] = get_bits1(&s->gb);
617 s->refidx[1] = get_bits(&s->gb, 3);
618 s->signbias[1] = get_bits1(&s->gb);
619 s->refidx[2] = get_bits(&s->gb, 3);
620 s->signbias[2] = get_bits1(&s->gb);
621 if (!s->refs[s->refidx[0]].f->data[0] ||
622 !s->refs[s->refidx[1]].f->data[0] ||
623 !s->refs[s->refidx[2]].f->data[0]) {
624 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
625 return AVERROR_INVALIDDATA;
627 if (get_bits1(&s->gb)) {
628 w = s->refs[s->refidx[0]].f->width;
629 h = s->refs[s->refidx[0]].f->height;
630 } else if (get_bits1(&s->gb)) {
631 w = s->refs[s->refidx[1]].f->width;
632 h = s->refs[s->refidx[1]].f->height;
633 } else if (get_bits1(&s->gb)) {
634 w = s->refs[s->refidx[2]].f->width;
635 h = s->refs[s->refidx[2]].f->height;
637 w = get_bits(&s->gb, 16) + 1;
638 h = get_bits(&s->gb, 16) + 1;
640 // Note that in this code, "CUR_FRAME" is actually before we
641 // have formally allocated a frame, and thus actually represents
643 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
644 s->frames[CUR_FRAME].tf.f->height == h;
645 if (get_bits1(&s->gb)) // display size
646 skip_bits(&s->gb, 32);
647 s->highprecisionmvs = get_bits1(&s->gb);
648 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
650 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
651 s->signbias[0] != s->signbias[2];
652 if (s->allowcompinter) {
653 if (s->signbias[0] == s->signbias[1]) {
655 s->varcompref[0] = 0;
656 s->varcompref[1] = 1;
657 } else if (s->signbias[0] == s->signbias[2]) {
659 s->varcompref[0] = 0;
660 s->varcompref[1] = 2;
663 s->varcompref[0] = 1;
664 s->varcompref[1] = 2;
668 for (i = 0; i < 3; i++) {
669 AVFrame *ref = s->refs[s->refidx[i]].f;
670 int refw = ref->width, refh = ref->height;
672 if (ref->format != fmt) {
673 av_log(ctx, AV_LOG_ERROR,
674 "Ref pixfmt (%s) did not match current frame (%s)",
675 av_get_pix_fmt_name(ref->format),
676 av_get_pix_fmt_name(fmt));
677 return AVERROR_INVALIDDATA;
678 } else if (refw == w && refh == h) {
679 s->mvscale[i][0] = s->mvscale[i][1] = 0;
681 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
682 av_log(ctx, AV_LOG_ERROR,
683 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
685 return AVERROR_INVALIDDATA;
687 s->mvscale[i][0] = (refw << 14) / w;
688 s->mvscale[i][1] = (refh << 14) / h;
689 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
690 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
695 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
696 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
697 s->framectxid = c = get_bits(&s->gb, 2);
699 /* loopfilter header data */
700 s->filter.level = get_bits(&s->gb, 6);
701 sharp = get_bits(&s->gb, 3);
702 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
703 // the old cache values since they are still valid
704 if (s->filter.sharpness != sharp)
705 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
706 s->filter.sharpness = sharp;
707 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
708 if (get_bits1(&s->gb)) {
709 for (i = 0; i < 4; i++)
710 if (get_bits1(&s->gb))
711 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
712 for (i = 0; i < 2; i++)
713 if (get_bits1(&s->gb))
714 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
718 /* quantization header data */
719 s->yac_qi = get_bits(&s->gb, 8);
720 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
721 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
722 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
723 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
724 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
726 /* segmentation header info */
727 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
728 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
729 for (i = 0; i < 7; i++)
730 s->prob.seg[i] = get_bits1(&s->gb) ?
731 get_bits(&s->gb, 8) : 255;
732 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
733 for (i = 0; i < 3; i++)
734 s->prob.segpred[i] = get_bits1(&s->gb) ?
735 get_bits(&s->gb, 8) : 255;
738 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
739 (w != s->frames[CUR_FRAME].tf.f->width ||
740 h != s->frames[CUR_FRAME].tf.f->height)) {
741 av_log(ctx, AV_LOG_ERROR,
742 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
743 s->segmentation.temporal, s->segmentation.update_map);
744 return AVERROR_INVALIDDATA;
747 if (get_bits1(&s->gb)) {
748 s->segmentation.absolute_vals = get_bits1(&s->gb);
749 for (i = 0; i < 8; i++) {
750 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
751 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
752 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
753 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
754 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
755 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
756 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
760 s->segmentation.feat[0].q_enabled = 0;
761 s->segmentation.feat[0].lf_enabled = 0;
762 s->segmentation.feat[0].skip_enabled = 0;
763 s->segmentation.feat[0].ref_enabled = 0;
766 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
767 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
768 int qyac, qydc, quvac, quvdc, lflvl, sh;
770 if (s->segmentation.feat[i].q_enabled) {
771 if (s->segmentation.absolute_vals)
772 qyac = s->segmentation.feat[i].q_val;
774 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
778 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
779 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
780 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
781 qyac = av_clip_uintp2(qyac, 8);
783 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
784 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
785 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
786 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
788 sh = s->filter.level >= 32;
789 if (s->segmentation.feat[i].lf_enabled) {
790 if (s->segmentation.absolute_vals)
791 lflvl = s->segmentation.feat[i].lf_val;
793 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
795 lflvl = s->filter.level;
797 if (s->lf_delta.enabled) {
798 s->segmentation.feat[i].lflvl[0][0] =
799 s->segmentation.feat[i].lflvl[0][1] =
800 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
801 for (j = 1; j < 4; j++) {
802 s->segmentation.feat[i].lflvl[j][0] =
803 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
804 s->lf_delta.mode[0]) * (1 << sh)), 6);
805 s->segmentation.feat[i].lflvl[j][1] =
806 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
807 s->lf_delta.mode[1]) * (1 << sh)), 6);
810 memset(s->segmentation.feat[i].lflvl, lflvl,
811 sizeof(s->segmentation.feat[i].lflvl));
816 if ((res = update_size(ctx, w, h, fmt)) < 0) {
817 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
820 for (s->tiling.log2_tile_cols = 0;
821 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
822 s->tiling.log2_tile_cols++) ;
823 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
824 max = FFMAX(0, max - 1);
825 while (max > s->tiling.log2_tile_cols) {
826 if (get_bits1(&s->gb))
827 s->tiling.log2_tile_cols++;
831 s->tiling.log2_tile_rows = decode012(&s->gb);
832 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
833 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
834 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
835 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
836 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
838 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
839 return AVERROR(ENOMEM);
843 if (s->keyframe || s->errorres || s->intraonly) {
844 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
845 s->prob_ctx[3].p = vp9_default_probs;
846 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
847 sizeof(vp9_default_coef_probs));
848 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
849 sizeof(vp9_default_coef_probs));
850 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
851 sizeof(vp9_default_coef_probs));
852 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
853 sizeof(vp9_default_coef_probs));
856 // next 16 bits is size of the rest of the header (arith-coded)
857 size2 = get_bits(&s->gb, 16);
858 data2 = align_get_bits(&s->gb);
859 if (size2 > size - (data2 - data)) {
860 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
861 return AVERROR_INVALIDDATA;
863 ff_vp56_init_range_decoder(&s->c, data2, size2);
864 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
865 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
866 return AVERROR_INVALIDDATA;
869 if (s->keyframe || s->intraonly) {
870 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
872 memset(&s->counts, 0, sizeof(s->counts));
874 // FIXME is it faster to not copy here, but do it down in the fw updates
875 // as explicit copies if the fw update is missing (and skip the copy upon
877 s->prob.p = s->prob_ctx[c].p;
881 s->txfmmode = TX_4X4;
883 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
884 if (s->txfmmode == 3)
885 s->txfmmode += vp8_rac_get(&s->c);
887 if (s->txfmmode == TX_SWITCHABLE) {
888 for (i = 0; i < 2; i++)
889 if (vp56_rac_get_prob_branchy(&s->c, 252))
890 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
891 for (i = 0; i < 2; i++)
892 for (j = 0; j < 2; j++)
893 if (vp56_rac_get_prob_branchy(&s->c, 252))
894 s->prob.p.tx16p[i][j] =
895 update_prob(&s->c, s->prob.p.tx16p[i][j]);
896 for (i = 0; i < 2; i++)
897 for (j = 0; j < 3; j++)
898 if (vp56_rac_get_prob_branchy(&s->c, 252))
899 s->prob.p.tx32p[i][j] =
900 update_prob(&s->c, s->prob.p.tx32p[i][j]);
905 for (i = 0; i < 4; i++) {
906 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
907 if (vp8_rac_get(&s->c)) {
908 for (j = 0; j < 2; j++)
909 for (k = 0; k < 2; k++)
910 for (l = 0; l < 6; l++)
911 for (m = 0; m < 6; m++) {
912 uint8_t *p = s->prob.coef[i][j][k][l][m];
913 uint8_t *r = ref[j][k][l][m];
914 if (m >= 3 && l == 0) // dc only has 3 pt
916 for (n = 0; n < 3; n++) {
917 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
918 p[n] = update_prob(&s->c, r[n]);
926 for (j = 0; j < 2; j++)
927 for (k = 0; k < 2; k++)
928 for (l = 0; l < 6; l++)
929 for (m = 0; m < 6; m++) {
930 uint8_t *p = s->prob.coef[i][j][k][l][m];
931 uint8_t *r = ref[j][k][l][m];
932 if (m > 3 && l == 0) // dc only has 3 pt
938 if (s->txfmmode == i)
943 for (i = 0; i < 3; i++)
944 if (vp56_rac_get_prob_branchy(&s->c, 252))
945 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
946 if (!s->keyframe && !s->intraonly) {
947 for (i = 0; i < 7; i++)
948 for (j = 0; j < 3; j++)
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.mv_mode[i][j] =
951 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
953 if (s->filtermode == FILTER_SWITCHABLE)
954 for (i = 0; i < 4; i++)
955 for (j = 0; j < 2; j++)
956 if (vp56_rac_get_prob_branchy(&s->c, 252))
957 s->prob.p.filter[i][j] =
958 update_prob(&s->c, s->prob.p.filter[i][j]);
960 for (i = 0; i < 4; i++)
961 if (vp56_rac_get_prob_branchy(&s->c, 252))
962 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
964 if (s->allowcompinter) {
965 s->comppredmode = vp8_rac_get(&s->c);
967 s->comppredmode += vp8_rac_get(&s->c);
968 if (s->comppredmode == PRED_SWITCHABLE)
969 for (i = 0; i < 5; i++)
970 if (vp56_rac_get_prob_branchy(&s->c, 252))
972 update_prob(&s->c, s->prob.p.comp[i]);
974 s->comppredmode = PRED_SINGLEREF;
977 if (s->comppredmode != PRED_COMPREF) {
978 for (i = 0; i < 5; i++) {
979 if (vp56_rac_get_prob_branchy(&s->c, 252))
980 s->prob.p.single_ref[i][0] =
981 update_prob(&s->c, s->prob.p.single_ref[i][0]);
982 if (vp56_rac_get_prob_branchy(&s->c, 252))
983 s->prob.p.single_ref[i][1] =
984 update_prob(&s->c, s->prob.p.single_ref[i][1]);
988 if (s->comppredmode != PRED_SINGLEREF) {
989 for (i = 0; i < 5; i++)
990 if (vp56_rac_get_prob_branchy(&s->c, 252))
991 s->prob.p.comp_ref[i] =
992 update_prob(&s->c, s->prob.p.comp_ref[i]);
995 for (i = 0; i < 4; i++)
996 for (j = 0; j < 9; j++)
997 if (vp56_rac_get_prob_branchy(&s->c, 252))
998 s->prob.p.y_mode[i][j] =
999 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1001 for (i = 0; i < 4; i++)
1002 for (j = 0; j < 4; j++)
1003 for (k = 0; k < 3; k++)
1004 if (vp56_rac_get_prob_branchy(&s->c, 252))
1005 s->prob.p.partition[3 - i][j][k] =
1006 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1008 // mv fields don't use the update_prob subexp model for some reason
1009 for (i = 0; i < 3; i++)
1010 if (vp56_rac_get_prob_branchy(&s->c, 252))
1011 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1013 for (i = 0; i < 2; i++) {
1014 if (vp56_rac_get_prob_branchy(&s->c, 252))
1015 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 for (j = 0; j < 10; j++)
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.mv_comp[i].classes[j] =
1020 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1022 if (vp56_rac_get_prob_branchy(&s->c, 252))
1023 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1025 for (j = 0; j < 10; j++)
1026 if (vp56_rac_get_prob_branchy(&s->c, 252))
1027 s->prob.p.mv_comp[i].bits[j] =
1028 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031 for (i = 0; i < 2; i++) {
1032 for (j = 0; j < 2; j++)
1033 for (k = 0; k < 3; k++)
1034 if (vp56_rac_get_prob_branchy(&s->c, 252))
1035 s->prob.p.mv_comp[i].class0_fp[j][k] =
1036 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1038 for (j = 0; j < 3; j++)
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_comp[i].fp[j] =
1041 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1044 if (s->highprecisionmvs) {
1045 for (i = 0; i < 2; i++) {
1046 if (vp56_rac_get_prob_branchy(&s->c, 252))
1047 s->prob.p.mv_comp[i].class0_hp =
1048 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050 if (vp56_rac_get_prob_branchy(&s->c, 252))
1051 s->prob.p.mv_comp[i].hp =
1052 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1057 return (data2 - data) + size2;
1060 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1063 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1064 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1067 static void find_ref_mvs(VP9Context *s,
1068 VP56mv *pmv, int ref, int z, int idx, int sb)
1070 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1071 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1072 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1073 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1074 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1075 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1076 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1077 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1078 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1079 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1080 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1081 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1082 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1083 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1084 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1085 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1086 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1087 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1088 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1089 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1090 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1091 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1092 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1093 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1094 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1095 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1096 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1099 int row = s->row, col = s->col, row7 = s->row7;
1100 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1101 #define INVALID_MV 0x80008000U
1102 uint32_t mem = INVALID_MV;
1105 #define RETURN_DIRECT_MV(mv) \
1107 uint32_t m = AV_RN32A(&mv); \
1111 } else if (mem == INVALID_MV) { \
1113 } else if (m != mem) { \
1120 if (sb == 2 || sb == 1) {
1121 RETURN_DIRECT_MV(b->mv[0][z]);
1122 } else if (sb == 3) {
1123 RETURN_DIRECT_MV(b->mv[2][z]);
1124 RETURN_DIRECT_MV(b->mv[1][z]);
1125 RETURN_DIRECT_MV(b->mv[0][z]);
1128 #define RETURN_MV(mv) \
1133 clamp_mv(&tmp, &mv, s); \
1134 m = AV_RN32A(&tmp); \
1138 } else if (mem == INVALID_MV) { \
1140 } else if (m != mem) { \
1145 uint32_t m = AV_RN32A(&mv); \
1147 clamp_mv(pmv, &mv, s); \
1149 } else if (mem == INVALID_MV) { \
1151 } else if (m != mem) { \
1152 clamp_mv(pmv, &mv, s); \
1159 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1160 if (mv->ref[0] == ref) {
1161 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1162 } else if (mv->ref[1] == ref) {
1163 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1166 if (col > s->tiling.tile_col_start) {
1167 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1168 if (mv->ref[0] == ref) {
1169 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1170 } else if (mv->ref[1] == ref) {
1171 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1179 // previously coded MVs in this neighbourhood, using same reference frame
1180 for (; i < 8; i++) {
1181 int c = p[i][0] + col, r = p[i][1] + row;
1183 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1184 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1186 if (mv->ref[0] == ref) {
1187 RETURN_MV(mv->mv[0]);
1188 } else if (mv->ref[1] == ref) {
1189 RETURN_MV(mv->mv[1]);
1194 // MV at this position in previous frame, using same reference frame
1195 if (s->use_last_frame_mvs) {
1196 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1198 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1199 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1200 if (mv->ref[0] == ref) {
1201 RETURN_MV(mv->mv[0]);
1202 } else if (mv->ref[1] == ref) {
1203 RETURN_MV(mv->mv[1]);
1207 #define RETURN_SCALE_MV(mv, scale) \
1210 VP56mv mv_temp = { -mv.x, -mv.y }; \
1211 RETURN_MV(mv_temp); \
1217 // previously coded MVs in this neighbourhood, using different reference frame
1218 for (i = 0; i < 8; i++) {
1219 int c = p[i][0] + col, r = p[i][1] + row;
1221 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1222 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1224 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1225 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1227 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1228 // BUG - libvpx has this condition regardless of whether
1229 // we used the first ref MV and pre-scaling
1230 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1231 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1236 // MV at this position in previous frame, using different reference frame
1237 if (s->use_last_frame_mvs) {
1238 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1240 // no need to await_progress, because we already did that above
1241 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1242 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1244 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1245 // BUG - libvpx has this condition regardless of whether
1246 // we used the first ref MV and pre-scaling
1247 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1248 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1255 #undef RETURN_SCALE_MV
1258 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1260 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1261 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1262 s->prob.p.mv_comp[idx].classes);
1264 s->counts.mv_comp[idx].sign[sign]++;
1265 s->counts.mv_comp[idx].classes[c]++;
1269 for (n = 0, m = 0; m < c; m++) {
1270 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1272 s->counts.mv_comp[idx].bits[m][bit]++;
1275 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1277 s->counts.mv_comp[idx].fp[bit]++;
1279 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1280 s->counts.mv_comp[idx].hp[bit]++;
1284 // bug in libvpx - we count for bw entropy purposes even if the
1286 s->counts.mv_comp[idx].hp[1]++;
1290 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1291 s->counts.mv_comp[idx].class0[n]++;
1292 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1293 s->prob.p.mv_comp[idx].class0_fp[n]);
1294 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1295 n = (n << 3) | (bit << 1);
1297 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1298 s->counts.mv_comp[idx].class0_hp[bit]++;
1302 // bug in libvpx - we count for bw entropy purposes even if the
1304 s->counts.mv_comp[idx].class0_hp[1]++;
1308 return sign ? -(n + 1) : (n + 1);
1311 static void fill_mv(VP9Context *s,
1312 VP56mv *mv, int mode, int sb)
1316 if (mode == ZEROMV) {
1321 // FIXME cache this value and reuse for other subblocks
1322 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1323 mode == NEWMV ? -1 : sb);
1324 // FIXME maybe move this code into find_ref_mvs()
1325 if ((mode == NEWMV || sb == -1) &&
1326 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1340 if (mode == NEWMV) {
1341 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1342 s->prob.p.mv_joint);
1344 s->counts.mv_joint[j]++;
1345 if (j >= MV_JOINT_V)
1346 mv[0].y += read_mv_component(s, 0, hp);
1348 mv[0].x += read_mv_component(s, 1, hp);
1352 // FIXME cache this value and reuse for other subblocks
1353 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1354 mode == NEWMV ? -1 : sb);
1355 if ((mode == NEWMV || sb == -1) &&
1356 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1370 if (mode == NEWMV) {
1371 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1372 s->prob.p.mv_joint);
1374 s->counts.mv_joint[j]++;
1375 if (j >= MV_JOINT_V)
1376 mv[1].y += read_mv_component(s, 0, hp);
1378 mv[1].x += read_mv_component(s, 1, hp);
1384 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1385 ptrdiff_t stride, int v)
1395 int v16 = v * 0x0101;
1403 uint32_t v32 = v * 0x01010101;
1412 uint64_t v64 = v * 0x0101010101010101ULL;
1418 uint32_t v32 = v * 0x01010101;
1421 AV_WN32A(ptr + 4, v32);
1430 static void decode_mode(AVCodecContext *ctx)
1432 static const uint8_t left_ctx[N_BS_SIZES] = {
1433 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1435 static const uint8_t above_ctx[N_BS_SIZES] = {
1436 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1438 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1439 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1440 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1442 VP9Context *s = ctx->priv_data;
1444 int row = s->row, col = s->col, row7 = s->row7;
1445 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1446 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1447 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1448 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1449 int vref, filter_id;
1451 if (!s->segmentation.enabled) {
1453 } else if (s->keyframe || s->intraonly) {
1454 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1455 } else if (!s->segmentation.update_map ||
1456 (s->segmentation.temporal &&
1457 vp56_rac_get_prob_branchy(&s->c,
1458 s->prob.segpred[s->above_segpred_ctx[col] +
1459 s->left_segpred_ctx[row7]]))) {
1462 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1464 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1465 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1466 for (y = 0; y < h4; y++) {
1467 int idx_base = (y + row) * 8 * s->sb_cols + col;
1468 for (x = 0; x < w4; x++)
1469 pred = FFMIN(pred, refsegmap[idx_base + x]);
1471 av_assert1(pred < 8);
1477 memset(&s->above_segpred_ctx[col], 1, w4);
1478 memset(&s->left_segpred_ctx[row7], 1, h4);
1480 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1483 memset(&s->above_segpred_ctx[col], 0, w4);
1484 memset(&s->left_segpred_ctx[row7], 0, h4);
1486 if (s->segmentation.enabled &&
1487 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1488 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1489 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1492 b->skip = s->segmentation.enabled &&
1493 s->segmentation.feat[b->seg_id].skip_enabled;
1495 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1496 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1497 s->counts.skip[c][b->skip]++;
1500 if (s->keyframe || s->intraonly) {
1502 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1503 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1507 if (have_a && have_l) {
1508 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1511 c = have_a ? 2 * s->above_intra_ctx[col] :
1512 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1514 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1515 s->counts.intra[c][bit]++;
1519 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1523 c = (s->above_skip_ctx[col] ? max_tx :
1524 s->above_txfm_ctx[col]) +
1525 (s->left_skip_ctx[row7] ? max_tx :
1526 s->left_txfm_ctx[row7]) > max_tx;
1528 c = s->above_skip_ctx[col] ? 1 :
1529 (s->above_txfm_ctx[col] * 2 > max_tx);
1531 } else if (have_l) {
1532 c = s->left_skip_ctx[row7] ? 1 :
1533 (s->left_txfm_ctx[row7] * 2 > max_tx);
1539 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1541 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1543 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1545 s->counts.tx32p[c][b->tx]++;
1548 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1550 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1551 s->counts.tx16p[c][b->tx]++;
1554 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1555 s->counts.tx8p[c][b->tx]++;
1562 b->tx = FFMIN(max_tx, s->txfmmode);
1565 if (s->keyframe || s->intraonly) {
1566 uint8_t *a = &s->above_mode_ctx[col * 2];
1567 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1570 if (b->bs > BS_8x8) {
1571 // FIXME the memory storage intermediates here aren't really
1572 // necessary, they're just there to make the code slightly
1574 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1575 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1576 if (b->bs != BS_8x4) {
1577 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1578 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1579 l[0] = a[1] = b->mode[1];
1581 l[0] = a[1] = b->mode[1] = b->mode[0];
1583 if (b->bs != BS_4x8) {
1584 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1585 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1586 if (b->bs != BS_8x4) {
1587 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1588 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1589 l[1] = a[1] = b->mode[3];
1591 l[1] = a[1] = b->mode[3] = b->mode[2];
1594 b->mode[2] = b->mode[0];
1595 l[1] = a[1] = b->mode[3] = b->mode[1];
1598 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1599 vp9_default_kf_ymode_probs[*a][*l]);
1600 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1601 // FIXME this can probably be optimized
1602 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1603 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1605 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1606 vp9_default_kf_uvmode_probs[b->mode[3]]);
1607 } else if (b->intra) {
1609 if (b->bs > BS_8x8) {
1610 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1611 s->prob.p.y_mode[0]);
1612 s->counts.y_mode[0][b->mode[0]]++;
1613 if (b->bs != BS_8x4) {
1614 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1615 s->prob.p.y_mode[0]);
1616 s->counts.y_mode[0][b->mode[1]]++;
1618 b->mode[1] = b->mode[0];
1620 if (b->bs != BS_4x8) {
1621 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1622 s->prob.p.y_mode[0]);
1623 s->counts.y_mode[0][b->mode[2]]++;
1624 if (b->bs != BS_8x4) {
1625 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1626 s->prob.p.y_mode[0]);
1627 s->counts.y_mode[0][b->mode[3]]++;
1629 b->mode[3] = b->mode[2];
1632 b->mode[2] = b->mode[0];
1633 b->mode[3] = b->mode[1];
1636 static const uint8_t size_group[10] = {
1637 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1639 int sz = size_group[b->bs];
1641 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1642 s->prob.p.y_mode[sz]);
1643 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1644 s->counts.y_mode[sz][b->mode[3]]++;
1646 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1647 s->prob.p.uv_mode[b->mode[3]]);
1648 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1650 static const uint8_t inter_mode_ctx_lut[14][14] = {
1651 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1652 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1653 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1654 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1655 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1656 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1657 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1658 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1659 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1660 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1661 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1662 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1663 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1664 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1667 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1668 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1670 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1672 // read comp_pred flag
1673 if (s->comppredmode != PRED_SWITCHABLE) {
1674 b->comp = s->comppredmode == PRED_COMPREF;
1678 // FIXME add intra as ref=0xff (or -1) to make these easier?
1681 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1683 } else if (s->above_comp_ctx[col]) {
1684 c = 2 + (s->left_intra_ctx[row7] ||
1685 s->left_ref_ctx[row7] == s->fixcompref);
1686 } else if (s->left_comp_ctx[row7]) {
1687 c = 2 + (s->above_intra_ctx[col] ||
1688 s->above_ref_ctx[col] == s->fixcompref);
1690 c = (!s->above_intra_ctx[col] &&
1691 s->above_ref_ctx[col] == s->fixcompref) ^
1692 (!s->left_intra_ctx[row7] &&
1693 s->left_ref_ctx[row & 7] == s->fixcompref);
1696 c = s->above_comp_ctx[col] ? 3 :
1697 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1699 } else if (have_l) {
1700 c = s->left_comp_ctx[row7] ? 3 :
1701 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1705 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1706 s->counts.comp[c][b->comp]++;
1709 // read actual references
1710 // FIXME probably cache a few variables here to prevent repetitive
1711 // memory accesses below
1712 if (b->comp) /* two references */ {
1713 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1715 b->ref[fix_idx] = s->fixcompref;
1716 // FIXME can this codeblob be replaced by some sort of LUT?
1719 if (s->above_intra_ctx[col]) {
1720 if (s->left_intra_ctx[row7]) {
1723 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1725 } else if (s->left_intra_ctx[row7]) {
1726 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1728 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1730 if (refl == refa && refa == s->varcompref[1]) {
1732 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1733 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1734 (refl == s->fixcompref && refa == s->varcompref[0])) {
1737 c = (refa == refl) ? 3 : 1;
1739 } else if (!s->left_comp_ctx[row7]) {
1740 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1743 c = (refl == s->varcompref[1] &&
1744 refa != s->varcompref[1]) ? 2 : 4;
1746 } else if (!s->above_comp_ctx[col]) {
1747 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1750 c = (refa == s->varcompref[1] &&
1751 refl != s->varcompref[1]) ? 2 : 4;
1754 c = (refl == refa) ? 4 : 2;
1758 if (s->above_intra_ctx[col]) {
1760 } else if (s->above_comp_ctx[col]) {
1761 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1763 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1766 } else if (have_l) {
1767 if (s->left_intra_ctx[row7]) {
1769 } else if (s->left_comp_ctx[row7]) {
1770 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1772 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1777 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1778 b->ref[var_idx] = s->varcompref[bit];
1779 s->counts.comp_ref[c][bit]++;
1780 } else /* single reference */ {
1783 if (have_a && !s->above_intra_ctx[col]) {
1784 if (have_l && !s->left_intra_ctx[row7]) {
1785 if (s->left_comp_ctx[row7]) {
1786 if (s->above_comp_ctx[col]) {
1787 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1788 !s->above_ref_ctx[col]);
1790 c = (3 * !s->above_ref_ctx[col]) +
1791 (!s->fixcompref || !s->left_ref_ctx[row7]);
1793 } else if (s->above_comp_ctx[col]) {
1794 c = (3 * !s->left_ref_ctx[row7]) +
1795 (!s->fixcompref || !s->above_ref_ctx[col]);
1797 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1799 } else if (s->above_intra_ctx[col]) {
1801 } else if (s->above_comp_ctx[col]) {
1802 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1804 c = 4 * (!s->above_ref_ctx[col]);
1806 } else if (have_l && !s->left_intra_ctx[row7]) {
1807 if (s->left_intra_ctx[row7]) {
1809 } else if (s->left_comp_ctx[row7]) {
1810 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1812 c = 4 * (!s->left_ref_ctx[row7]);
1817 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1818 s->counts.single_ref[c][0][bit]++;
1822 // FIXME can this codeblob be replaced by some sort of LUT?
1825 if (s->left_intra_ctx[row7]) {
1826 if (s->above_intra_ctx[col]) {
1828 } else if (s->above_comp_ctx[col]) {
1829 c = 1 + 2 * (s->fixcompref == 1 ||
1830 s->above_ref_ctx[col] == 1);
1831 } else if (!s->above_ref_ctx[col]) {
1834 c = 4 * (s->above_ref_ctx[col] == 1);
1836 } else if (s->above_intra_ctx[col]) {
1837 if (s->left_intra_ctx[row7]) {
1839 } else if (s->left_comp_ctx[row7]) {
1840 c = 1 + 2 * (s->fixcompref == 1 ||
1841 s->left_ref_ctx[row7] == 1);
1842 } else if (!s->left_ref_ctx[row7]) {
1845 c = 4 * (s->left_ref_ctx[row7] == 1);
1847 } else if (s->above_comp_ctx[col]) {
1848 if (s->left_comp_ctx[row7]) {
1849 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1850 c = 3 * (s->fixcompref == 1 ||
1851 s->left_ref_ctx[row7] == 1);
1855 } else if (!s->left_ref_ctx[row7]) {
1856 c = 1 + 2 * (s->fixcompref == 1 ||
1857 s->above_ref_ctx[col] == 1);
1859 c = 3 * (s->left_ref_ctx[row7] == 1) +
1860 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1862 } else if (s->left_comp_ctx[row7]) {
1863 if (!s->above_ref_ctx[col]) {
1864 c = 1 + 2 * (s->fixcompref == 1 ||
1865 s->left_ref_ctx[row7] == 1);
1867 c = 3 * (s->above_ref_ctx[col] == 1) +
1868 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1870 } else if (!s->above_ref_ctx[col]) {
1871 if (!s->left_ref_ctx[row7]) {
1874 c = 4 * (s->left_ref_ctx[row7] == 1);
1876 } else if (!s->left_ref_ctx[row7]) {
1877 c = 4 * (s->above_ref_ctx[col] == 1);
1879 c = 2 * (s->left_ref_ctx[row7] == 1) +
1880 2 * (s->above_ref_ctx[col] == 1);
1883 if (s->above_intra_ctx[col] ||
1884 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1886 } else if (s->above_comp_ctx[col]) {
1887 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1889 c = 4 * (s->above_ref_ctx[col] == 1);
1892 } else if (have_l) {
1893 if (s->left_intra_ctx[row7] ||
1894 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1896 } else if (s->left_comp_ctx[row7]) {
1897 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1899 c = 4 * (s->left_ref_ctx[row7] == 1);
1904 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1905 s->counts.single_ref[c][1][bit]++;
1906 b->ref[0] = 1 + bit;
1911 if (b->bs <= BS_8x8) {
1912 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1913 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1915 static const uint8_t off[10] = {
1916 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1919 // FIXME this needs to use the LUT tables from find_ref_mvs
1920 // because not all are -1,0/0,-1
1921 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1922 [s->left_mode_ctx[row7 + off[b->bs]]];
1924 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1925 s->prob.p.mv_mode[c]);
1926 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1927 s->counts.mv_mode[c][b->mode[0] - 10]++;
1931 if (s->filtermode == FILTER_SWITCHABLE) {
1934 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1935 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1936 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1937 s->left_filter_ctx[row7] : 3;
1939 c = s->above_filter_ctx[col];
1941 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1942 c = s->left_filter_ctx[row7];
1947 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1948 s->prob.p.filter[c]);
1949 s->counts.filter[c][filter_id]++;
1950 b->filter = vp9_filter_lut[filter_id];
1952 b->filter = s->filtermode;
1955 if (b->bs > BS_8x8) {
1956 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1958 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1959 s->prob.p.mv_mode[c]);
1960 s->counts.mv_mode[c][b->mode[0] - 10]++;
1961 fill_mv(s, b->mv[0], b->mode[0], 0);
1963 if (b->bs != BS_8x4) {
1964 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1965 s->prob.p.mv_mode[c]);
1966 s->counts.mv_mode[c][b->mode[1] - 10]++;
1967 fill_mv(s, b->mv[1], b->mode[1], 1);
1969 b->mode[1] = b->mode[0];
1970 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1971 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1974 if (b->bs != BS_4x8) {
1975 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1976 s->prob.p.mv_mode[c]);
1977 s->counts.mv_mode[c][b->mode[2] - 10]++;
1978 fill_mv(s, b->mv[2], b->mode[2], 2);
1980 if (b->bs != BS_8x4) {
1981 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1982 s->prob.p.mv_mode[c]);
1983 s->counts.mv_mode[c][b->mode[3] - 10]++;
1984 fill_mv(s, b->mv[3], b->mode[3], 3);
1986 b->mode[3] = b->mode[2];
1987 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1988 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1991 b->mode[2] = b->mode[0];
1992 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1993 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1994 b->mode[3] = b->mode[1];
1995 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1996 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1999 fill_mv(s, b->mv[0], b->mode[0], -1);
2000 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2001 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2002 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2003 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2004 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2005 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2008 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2012 #define SPLAT_CTX(var, val, n) \
2014 case 1: var = val; break; \
2015 case 2: AV_WN16A(&var, val * 0x0101); break; \
2016 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2017 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2019 uint64_t v64 = val * 0x0101010101010101ULL; \
2020 AV_WN64A( &var, v64); \
2021 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2026 #define SPLAT_CTX(var, val, n) \
2028 case 1: var = val; break; \
2029 case 2: AV_WN16A(&var, val * 0x0101); break; \
2030 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2032 uint32_t v32 = val * 0x01010101; \
2033 AV_WN32A( &var, v32); \
2034 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2038 uint32_t v32 = val * 0x01010101; \
2039 AV_WN32A( &var, v32); \
2040 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2041 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2042 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2048 switch (bwh_tab[1][b->bs][0]) {
2049 #define SET_CTXS(dir, off, n) \
2051 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2052 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2053 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2054 if (!s->keyframe && !s->intraonly) { \
2055 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2056 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2057 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2059 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2060 if (s->filtermode == FILTER_SWITCHABLE) { \
2061 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2066 case 1: SET_CTXS(above, col, 1); break;
2067 case 2: SET_CTXS(above, col, 2); break;
2068 case 4: SET_CTXS(above, col, 4); break;
2069 case 8: SET_CTXS(above, col, 8); break;
2071 switch (bwh_tab[1][b->bs][1]) {
2072 case 1: SET_CTXS(left, row7, 1); break;
2073 case 2: SET_CTXS(left, row7, 2); break;
2074 case 4: SET_CTXS(left, row7, 4); break;
2075 case 8: SET_CTXS(left, row7, 8); break;
2080 if (!s->keyframe && !s->intraonly) {
2081 if (b->bs > BS_8x8) {
2082 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2084 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2085 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2086 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2087 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2088 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2089 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2090 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2091 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2093 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2095 for (n = 0; n < w4 * 2; n++) {
2096 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2097 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2099 for (n = 0; n < h4 * 2; n++) {
2100 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2101 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2107 for (y = 0; y < h4; y++) {
2108 int x, o = (row + y) * s->sb_cols * 8 + col;
2109 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2112 for (x = 0; x < w4; x++) {
2116 } else if (b->comp) {
2117 for (x = 0; x < w4; x++) {
2118 mv[x].ref[0] = b->ref[0];
2119 mv[x].ref[1] = b->ref[1];
2120 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2121 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2124 for (x = 0; x < w4; x++) {
2125 mv[x].ref[0] = b->ref[0];
2127 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2133 // FIXME merge cnt/eob arguments?
2134 static av_always_inline int
2135 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2136 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2137 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2138 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2139 const int16_t *band_counts, const int16_t *qmul)
2141 int i = 0, band = 0, band_left = band_counts[band];
2142 uint8_t *tp = p[0][nnz];
2143 uint8_t cache[1024];
2148 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2149 eob[band][nnz][val]++;
2154 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2155 cnt[band][nnz][0]++;
2157 band_left = band_counts[++band];
2159 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2161 if (++i == n_coeffs)
2162 break; //invalid input; blocks should end with EOB
2167 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2168 cnt[band][nnz][1]++;
2172 // fill in p[3-10] (model fill) - only once per frame for each pos
2174 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2176 cnt[band][nnz][2]++;
2177 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2178 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2179 cache[rc] = val = 2;
2181 val = 3 + vp56_rac_get_prob(c, tp[5]);
2184 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2186 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2187 val = 5 + vp56_rac_get_prob(c, 159);
2189 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2190 val += vp56_rac_get_prob(c, 145);
2194 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2195 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2196 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2197 val += (vp56_rac_get_prob(c, 148) << 1);
2198 val += vp56_rac_get_prob(c, 140);
2200 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2201 val += (vp56_rac_get_prob(c, 155) << 2);
2202 val += (vp56_rac_get_prob(c, 140) << 1);
2203 val += vp56_rac_get_prob(c, 135);
2205 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2206 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2207 val += (vp56_rac_get_prob(c, 157) << 3);
2208 val += (vp56_rac_get_prob(c, 141) << 2);
2209 val += (vp56_rac_get_prob(c, 134) << 1);
2210 val += vp56_rac_get_prob(c, 130);
2213 if (!is8bitsperpixel) {
2215 val += vp56_rac_get_prob(c, 255) << 17;
2216 val += vp56_rac_get_prob(c, 255) << 16;
2218 val += (vp56_rac_get_prob(c, 255) << 15);
2219 val += (vp56_rac_get_prob(c, 255) << 14);
2221 val += (vp56_rac_get_prob(c, 254) << 13);
2222 val += (vp56_rac_get_prob(c, 254) << 12);
2223 val += (vp56_rac_get_prob(c, 254) << 11);
2224 val += (vp56_rac_get_prob(c, 252) << 10);
2225 val += (vp56_rac_get_prob(c, 249) << 9);
2226 val += (vp56_rac_get_prob(c, 243) << 8);
2227 val += (vp56_rac_get_prob(c, 230) << 7);
2228 val += (vp56_rac_get_prob(c, 196) << 6);
2229 val += (vp56_rac_get_prob(c, 177) << 5);
2230 val += (vp56_rac_get_prob(c, 153) << 4);
2231 val += (vp56_rac_get_prob(c, 140) << 3);
2232 val += (vp56_rac_get_prob(c, 133) << 2);
2233 val += (vp56_rac_get_prob(c, 130) << 1);
2234 val += vp56_rac_get_prob(c, 129);
2238 #define STORE_COEF(c, i, v) do { \
2239 if (is8bitsperpixel) { \
2242 AV_WN32A(&c[i * 2], v); \
2246 band_left = band_counts[++band];
2248 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2250 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2251 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2253 } while (++i < n_coeffs);
2258 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2259 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2260 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2261 const int16_t (*nb)[2], const int16_t *band_counts,
2262 const int16_t *qmul)
2264 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2265 nnz, scan, nb, band_counts, qmul);
2268 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2269 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2270 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2271 const int16_t (*nb)[2], const int16_t *band_counts,
2272 const int16_t *qmul)
2274 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2275 nnz, scan, nb, band_counts, qmul);
2278 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2279 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2280 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2281 const int16_t (*nb)[2], const int16_t *band_counts,
2282 const int16_t *qmul)
2284 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2285 nnz, scan, nb, band_counts, qmul);
2288 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2289 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2290 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2291 const int16_t (*nb)[2], const int16_t *band_counts,
2292 const int16_t *qmul)
2294 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2295 nnz, scan, nb, band_counts, qmul);
2298 static av_always_inline void decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2300 VP9Context *s = ctx->priv_data;
2302 int row = s->row, col = s->col;
2303 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2304 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2305 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2306 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2307 int end_x = FFMIN(2 * (s->cols - col), w4);
2308 int end_y = FFMIN(2 * (s->rows - row), h4);
2309 int n, pl, x, y, res;
2310 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2311 int tx = 4 * s->lossless + b->tx;
2312 const int16_t * const *yscans = vp9_scans[tx];
2313 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2314 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2315 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2316 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2317 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2318 static const int16_t band_counts[4][8] = {
2319 { 1, 2, 3, 4, 3, 16 - 13 },
2320 { 1, 2, 3, 4, 11, 64 - 21 },
2321 { 1, 2, 3, 4, 11, 256 - 21 },
2322 { 1, 2, 3, 4, 11, 1024 - 21 },
2324 const int16_t *y_band_counts = band_counts[b->tx];
2325 const int16_t *uv_band_counts = band_counts[b->uvtx];
2326 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2328 #define MERGE(la, end, step, rd) \
2329 for (n = 0; n < end; n += step) \
2330 la[n] = !!rd(&la[n])
2331 #define MERGE_CTX(step, rd) \
2333 MERGE(l, end_y, step, rd); \
2334 MERGE(a, end_x, step, rd); \
2337 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2338 for (n = 0, y = 0; y < end_y; y += step) { \
2339 for (x = 0; x < end_x; x += step, n += step * step) { \
2340 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2341 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2342 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2343 c, e, p, a[x] + l[y], yscans[txtp], \
2344 ynbs[txtp], y_band_counts, qmul[0]); \
2345 a[x] = l[y] = !!res; \
2347 AV_WN16A(&s->eob[n], res); \
2354 #define SPLAT(la, end, step, cond) \
2356 for (n = 1; n < end; n += step) \
2357 la[n] = la[n - 1]; \
2358 } else if (step == 4) { \
2360 for (n = 0; n < end; n += step) \
2361 AV_WN32A(&la[n], la[n] * 0x01010101); \
2363 for (n = 0; n < end; n += step) \
2364 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2366 } else /* step == 8 */ { \
2368 if (HAVE_FAST_64BIT) { \
2369 for (n = 0; n < end; n += step) \
2370 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2372 for (n = 0; n < end; n += step) { \
2373 uint32_t v32 = la[n] * 0x01010101; \
2374 AV_WN32A(&la[n], v32); \
2375 AV_WN32A(&la[n + 4], v32); \
2379 for (n = 0; n < end; n += step) \
2380 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2383 #define SPLAT_CTX(step) \
2385 SPLAT(a, end_x, step, end_x == w4); \
2386 SPLAT(l, end_y, step, end_y == h4); \
2392 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2395 MERGE_CTX(2, AV_RN16A);
2396 DECODE_Y_COEF_LOOP(2, 0,);
2400 MERGE_CTX(4, AV_RN32A);
2401 DECODE_Y_COEF_LOOP(4, 0,);
2405 MERGE_CTX(8, AV_RN64A);
2406 DECODE_Y_COEF_LOOP(8, 0, 32);
2411 #define DECODE_UV_COEF_LOOP(step, v) \
2412 for (n = 0, y = 0; y < end_y; y += step) { \
2413 for (x = 0; x < end_x; x += step, n += step * step) { \
2414 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2415 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2416 16 * step * step, c, e, p, a[x] + l[y], \
2417 uvscan, uvnb, uv_band_counts, qmul[1]); \
2418 a[x] = l[y] = !!res; \
2420 AV_WN16A(&s->uveob[pl][n], res); \
2422 s->uveob[pl][n] = res; \
2427 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2428 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2429 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2434 for (pl = 0; pl < 2; pl++) {
2435 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2436 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2439 DECODE_UV_COEF_LOOP(1,);
2442 MERGE_CTX(2, AV_RN16A);
2443 DECODE_UV_COEF_LOOP(2,);
2447 MERGE_CTX(4, AV_RN32A);
2448 DECODE_UV_COEF_LOOP(4,);
2452 MERGE_CTX(8, AV_RN64A);
2453 DECODE_UV_COEF_LOOP(8, 32);
2460 static void decode_coeffs_8bpp(AVCodecContext *ctx)
2462 decode_coeffs(ctx, 1);
2465 static void decode_coeffs_16bpp(AVCodecContext *ctx)
2467 decode_coeffs(ctx, 0);
2470 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2471 uint8_t *dst_edge, ptrdiff_t stride_edge,
2472 uint8_t *dst_inner, ptrdiff_t stride_inner,
2473 uint8_t *l, int col, int x, int w,
2474 int row, int y, enum TxfmMode tx,
2475 int p, int ss_h, int ss_v, int bytesperpixel)
2477 int have_top = row > 0 || y > 0;
2478 int have_left = col > s->tiling.tile_col_start || x > 0;
2479 int have_right = x < w - 1;
2481 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2482 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2483 { DC_127_PRED, VERT_PRED } },
2484 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2485 { HOR_PRED, HOR_PRED } },
2486 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2487 { LEFT_DC_PRED, DC_PRED } },
2488 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2489 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2490 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2491 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2492 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2493 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2494 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2495 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2496 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2497 { DC_127_PRED, VERT_LEFT_PRED } },
2498 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2499 { HOR_UP_PRED, HOR_UP_PRED } },
2500 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2501 { HOR_PRED, TM_VP8_PRED } },
2503 static const struct {
2504 uint8_t needs_left:1;
2505 uint8_t needs_top:1;
2506 uint8_t needs_topleft:1;
2507 uint8_t needs_topright:1;
2508 uint8_t invert_left:1;
2509 } edges[N_INTRA_PRED_MODES] = {
2510 [VERT_PRED] = { .needs_top = 1 },
2511 [HOR_PRED] = { .needs_left = 1 },
2512 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2513 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2514 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2515 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2516 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2517 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2518 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2519 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2520 [LEFT_DC_PRED] = { .needs_left = 1 },
2521 [TOP_DC_PRED] = { .needs_top = 1 },
2522 [DC_128_PRED] = { 0 },
2523 [DC_127_PRED] = { 0 },
2524 [DC_129_PRED] = { 0 }
2527 av_assert2(mode >= 0 && mode < 10);
2528 mode = mode_conv[mode][have_left][have_top];
2529 if (edges[mode].needs_top) {
2530 uint8_t *top, *topleft;
2531 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2532 int n_px_need_tr = 0;
2534 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2537 // if top of sb64-row, use s->intra_pred_data[] instead of
2538 // dst[-stride] for intra prediction (it contains pre- instead of
2539 // post-loopfilter data)
2541 top = !(row & 7) && !y ?
2542 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2543 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2545 topleft = !(row & 7) && !y ?
2546 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2547 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2548 &dst_inner[-stride_inner];
2552 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2553 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2554 n_px_need + n_px_need_tr <= n_px_have) {
2558 if (n_px_need <= n_px_have) {
2559 memcpy(*a, top, n_px_need * bytesperpixel);
2561 #define memset_bpp(c, i1, v, i2, num) do { \
2562 if (bytesperpixel == 1) { \
2563 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2565 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2566 for (n = 0; n < (num); n++) { \
2567 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2571 memcpy(*a, top, n_px_have * bytesperpixel);
2572 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2575 #define memset_val(c, val, num) do { \
2576 if (bytesperpixel == 1) { \
2577 memset((c), (val), (num)); \
2580 for (n = 0; n < (num); n++) { \
2581 AV_WN16A(&(c)[n * 2], (val)); \
2585 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2587 if (edges[mode].needs_topleft) {
2588 if (have_left && have_top) {
2589 #define assign_bpp(c, i1, v, i2) do { \
2590 if (bytesperpixel == 1) { \
2591 (c)[(i1)] = (v)[(i2)]; \
2593 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2596 assign_bpp(*a, -1, topleft, -1);
2598 #define assign_val(c, i, v) do { \
2599 if (bytesperpixel == 1) { \
2602 AV_WN16A(&(c)[(i) * 2], (v)); \
2605 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2608 if (tx == TX_4X4 && edges[mode].needs_topright) {
2609 if (have_top && have_right &&
2610 n_px_need + n_px_need_tr <= n_px_have) {
2611 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2613 memset_bpp(*a, 4, *a, 3, 4);
2618 if (edges[mode].needs_left) {
2620 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2621 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2622 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2624 if (edges[mode].invert_left) {
2625 if (n_px_need <= n_px_have) {
2626 for (i = 0; i < n_px_need; i++)
2627 assign_bpp(l, i, &dst[i * stride], -1);
2629 for (i = 0; i < n_px_have; i++)
2630 assign_bpp(l, i, &dst[i * stride], -1);
2631 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2634 if (n_px_need <= n_px_have) {
2635 for (i = 0; i < n_px_need; i++)
2636 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2638 for (i = 0; i < n_px_have; i++)
2639 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2640 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2644 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2651 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2652 ptrdiff_t uv_off, int bytesperpixel)
2654 VP9Context *s = ctx->priv_data;
2656 int row = s->row, col = s->col;
2657 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2658 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2659 int end_x = FFMIN(2 * (s->cols - col), w4);
2660 int end_y = FFMIN(2 * (s->rows - row), h4);
2661 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2662 int uvstep1d = 1 << b->uvtx, p;
2663 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2664 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2665 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2667 for (n = 0, y = 0; y < end_y; y += step1d) {
2668 uint8_t *ptr = dst, *ptr_r = dst_r;
2669 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2670 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2671 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2673 uint8_t *a = &a_buf[32];
2674 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2675 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2677 mode = check_intra_mode(s, mode, &a, ptr_r,
2678 s->frames[CUR_FRAME].tf.f->linesize[0],
2679 ptr, s->y_stride, l,
2680 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2681 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2683 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2684 s->block + 16 * n * bytesperpixel, eob);
2686 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2687 dst += 4 * step1d * s->y_stride;
2694 step = 1 << (b->uvtx * 2);
2695 for (p = 0; p < 2; p++) {
2696 dst = s->dst[1 + p];
2697 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2698 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2699 uint8_t *ptr = dst, *ptr_r = dst_r;
2700 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2701 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2702 int mode = b->uvmode;
2703 uint8_t *a = &a_buf[32];
2704 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2706 mode = check_intra_mode(s, mode, &a, ptr_r,
2707 s->frames[CUR_FRAME].tf.f->linesize[1],
2708 ptr, s->uv_stride, l, col, x, w4, row, y,
2709 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2710 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2712 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2713 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2715 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2716 dst += 4 * uvstep1d * s->uv_stride;
2721 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2723 intra_recon(ctx, y_off, uv_off, 1);
2726 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2728 intra_recon(ctx, y_off, uv_off, 2);
2731 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2732 uint8_t *dst, ptrdiff_t dst_stride,
2733 const uint8_t *ref, ptrdiff_t ref_stride,
2734 ThreadFrame *ref_frame,
2735 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2736 int bw, int bh, int w, int h,
2737 const uint16_t *scale, const uint8_t *step)
2739 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2740 // BUG libvpx seems to scale the two components separately. This introduces
2741 // rounding errors but we have to reproduce them to be exactly compatible
2742 // with the output from libvpx...
2743 int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2744 int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2745 int refbw_m1, refbh_m1;
2750 ref += y * ref_stride + x;
2753 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2754 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2755 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2756 // we use +7 because the last 7 pixels of each sbrow can be changed in
2757 // the longest loopfilter of the next sbrow
2758 th = (y + refbh_m1 + 4 + 7) >> 6;
2759 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2760 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2761 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2762 ref - 3 * ref_stride - 3,
2764 refbw_m1 + 8, refbh_m1 + 8,
2765 x - 3, y - 3, w, h);
2766 ref = s->edge_emu_buffer + 3 * 144 + 3;
2769 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2772 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2773 uint8_t *dst_u, uint8_t *dst_v,
2774 ptrdiff_t dst_stride,
2775 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2776 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2777 ThreadFrame *ref_frame,
2778 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2779 int bw, int bh, int w, int h,
2780 const uint16_t *scale, const uint8_t *step)
2782 // BUG https://code.google.com/p/webm/issues/detail?id=820
2783 int mx = scale_mv(mv->x << !s->ss_h, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2784 int my = scale_mv(mv->y << !s->ss_v, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2786 int refbw_m1, refbh_m1;
2791 ref_u += y * src_stride_u + x;
2792 ref_v += y * src_stride_v + x;
2795 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2796 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2797 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2798 // we use +7 because the last 7 pixels of each sbrow can be changed in
2799 // the longest loopfilter of the next sbrow
2800 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2801 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2802 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2803 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2804 ref_u - 3 * src_stride_u - 3,
2806 refbw_m1 + 8, refbh_m1 + 8,
2807 x - 3, y - 3, w, h);
2808 ref_u = s->edge_emu_buffer + 3 * 144 + 3;
2809 smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2811 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2812 ref_v - 3 * src_stride_v - 3,
2814 refbw_m1 + 8, refbh_m1 + 8,
2815 x - 3, y - 3, w, h);
2816 ref_v = s->edge_emu_buffer + 3 * 144 + 3;
2817 smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2819 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2820 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2824 #define FN(x) x##_scaled
2825 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2826 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2827 mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2828 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2829 row, col, mv, bw, bh, w, h, i) \
2830 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2831 row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2832 #include "vp9_mc_template.c"
2834 #undef mc_chroma_dir
2837 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2838 uint8_t *dst, ptrdiff_t dst_stride,
2839 const uint8_t *ref, ptrdiff_t ref_stride,
2840 ThreadFrame *ref_frame,
2841 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2842 int bw, int bh, int w, int h)
2844 int mx = mv->x, my = mv->y, th;
2848 ref += y * ref_stride + x;
2851 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2852 // we use +7 because the last 7 pixels of each sbrow can be changed in
2853 // the longest loopfilter of the next sbrow
2854 th = (y + bh + 4 * !!my + 7) >> 6;
2855 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2856 if (x < !!mx * 3 || y < !!my * 3 ||
2857 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2858 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2859 ref - !!my * 3 * ref_stride - !!mx * 3,
2861 bw + !!mx * 7, bh + !!my * 7,
2862 x - !!mx * 3, y - !!my * 3, w, h);
2863 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2866 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2869 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2870 uint8_t *dst_u, uint8_t *dst_v,
2871 ptrdiff_t dst_stride,
2872 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2873 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2874 ThreadFrame *ref_frame,
2875 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2876 int bw, int bh, int w, int h)
2878 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2882 ref_u += y * src_stride_u + x;
2883 ref_v += y * src_stride_v + x;
2886 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2887 // we use +7 because the last 7 pixels of each sbrow can be changed in
2888 // the longest loopfilter of the next sbrow
2889 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2890 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2891 if (x < !!mx * 3 || y < !!my * 3 ||
2892 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2893 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2894 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2896 bw + !!mx * 7, bh + !!my * 7,
2897 x - !!mx * 3, y - !!my * 3, w, h);
2898 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2899 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2901 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2902 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2904 bw + !!mx * 7, bh + !!my * 7,
2905 x - !!mx * 3, y - !!my * 3, w, h);
2906 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2907 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2909 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2910 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2915 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2916 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2918 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2919 row, col, mv, bw, bh, w, h, i) \
2920 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2921 row, col, mv, bw, bh, w, h)
2922 #include "vp9_mc_template.c"
2923 #undef mc_luma_dir_dir
2924 #undef mc_chroma_dir_dir
2927 static void inter_recon(AVCodecContext *ctx)
2929 VP9Context *s = ctx->priv_data;
2931 int row = s->row, col = s->col;
2933 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2934 inter_pred_scaled(ctx);
2939 /* mostly copied intra_recon() */
2941 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2942 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2943 int end_x = FFMIN(2 * (s->cols - col), w4);
2944 int end_y = FFMIN(2 * (s->rows - row), h4);
2945 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2946 int uvstep1d = 1 << b->uvtx, p;
2947 uint8_t *dst = s->dst[0];
2950 for (n = 0, y = 0; y < end_y; y += step1d) {
2952 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2953 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2956 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2957 s->block + 16 * n, eob);
2959 dst += 4 * s->y_stride * step1d;
2965 step = 1 << (b->uvtx * 2);
2966 for (p = 0; p < 2; p++) {
2967 dst = s->dst[p + 1];
2968 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2970 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2971 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2974 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2975 s->uvblock[p] + 16 * n, eob);
2977 dst += 4 * uvstep1d * s->uv_stride;
2983 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
2984 int row_and_7, int col_and_7,
2985 int w, int h, int col_end, int row_end,
2986 enum TxfmMode tx, int skip_inter)
2988 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
2989 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
2991 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2992 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2993 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2994 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2996 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2997 // edges. This means that for UV, we work on two subsampled blocks at
2998 // a time, and we only use the topleft block's mode information to set
2999 // things like block strength. Thus, for any block size smaller than
3000 // 16x16, ignore the odd portion of the block.
3001 if (tx == TX_4X4 && (ss_v | ss_h)) {
3016 if (tx == TX_4X4 && !skip_inter) {
3017 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3018 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3019 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3021 for (y = row_and_7; y < h + row_and_7; y++) {
3022 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3024 mask[0][y][1] |= m_row_8;
3025 mask[0][y][2] |= m_row_4;
3026 // for odd lines, if the odd col is not being filtered,
3027 // skip odd row also:
3034 // if a/c are even row/col and b/d are odd, and d is skipped,
3035 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3036 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3037 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3039 mask[1][y][col_mask_id] |= m_col;
3042 mask[0][y][3] |= m_col;
3044 mask[1][y][3] |= m_col;
3047 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3050 int mask_id = (tx == TX_8X8);
3051 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3052 int l2 = tx + ss_h - 1, step1d;
3053 int m_row = m_col & masks[l2];
3055 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3056 // 8wd loopfilter to prevent going off the visible edge.
3057 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3058 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3059 int m_row_8 = m_row - m_row_16;
3061 for (y = row_and_7; y < h + row_and_7; y++) {
3062 mask[0][y][0] |= m_row_16;
3063 mask[0][y][1] |= m_row_8;
3066 for (y = row_and_7; y < h + row_and_7; y++)
3067 mask[0][y][mask_id] |= m_row;
3072 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3073 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3074 mask[1][y][0] |= m_col;
3075 if (y - row_and_7 == h - 1)
3076 mask[1][y][1] |= m_col;
3078 for (y = row_and_7; y < h + row_and_7; y += step1d)
3079 mask[1][y][mask_id] |= m_col;
3081 } else if (tx != TX_4X4) {
3084 mask_id = (tx == TX_8X8) || (h == ss_v);
3085 mask[1][row_and_7][mask_id] |= m_col;
3086 mask_id = (tx == TX_8X8) || (w == ss_h);
3087 for (y = row_and_7; y < h + row_and_7; y++)
3088 mask[0][y][mask_id] |= t;
3090 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3092 for (y = row_and_7; y < h + row_and_7; y++) {
3093 mask[0][y][2] |= t4;
3094 mask[0][y][1] |= t8;
3096 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3101 static void decode_b(AVCodecContext *ctx, int row, int col,
3102 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3103 enum BlockLevel bl, enum BlockPartition bp)
3105 VP9Context *s = ctx->priv_data;
3107 enum BlockSize bs = bl * 3 + bp;
3108 int bytesperpixel = s->bytesperpixel;
3109 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3111 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3117 s->min_mv.x = -(128 + col * 64);
3118 s->min_mv.y = -(128 + row * 64);
3119 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3120 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3126 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3127 (s->ss_v && h4 * 2 == (1 << b->tx)));
3130 if (bytesperpixel == 1) {
3131 decode_coeffs_8bpp(ctx);
3133 decode_coeffs_16bpp(ctx);
3138 #define SPLAT_ZERO_CTX(v, n) \
3140 case 1: v = 0; break; \
3141 case 2: AV_ZERO16(&v); break; \
3142 case 4: AV_ZERO32(&v); break; \
3143 case 8: AV_ZERO64(&v); break; \
3144 case 16: AV_ZERO128(&v); break; \
3146 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3148 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3149 if (s->ss_##dir2) { \
3150 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3151 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3153 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3154 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3159 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3160 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3161 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3162 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3165 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3166 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3167 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3168 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3173 s->block += w4 * h4 * 64 * bytesperpixel;
3174 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3175 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3176 s->eob += 4 * w4 * h4;
3177 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3178 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3184 // emulated overhangs if the stride of the target buffer can't hold. This
3185 // allows to support emu-edge and so on even if we have large block
3187 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3188 (row + h4) > s->rows;
3189 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3190 (row + h4) > s->rows;
3192 s->dst[0] = s->tmp_y;
3195 s->dst[0] = f->data[0] + yoff;
3196 s->y_stride = f->linesize[0];
3199 s->dst[1] = s->tmp_uv[0];
3200 s->dst[2] = s->tmp_uv[1];
3203 s->dst[1] = f->data[1] + uvoff;
3204 s->dst[2] = f->data[2] + uvoff;
3205 s->uv_stride = f->linesize[1];
3209 intra_recon_16bpp(ctx, yoff, uvoff);
3211 intra_recon_8bpp(ctx, yoff, uvoff);
3217 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3219 for (n = 0; o < w; n++) {
3224 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3225 s->tmp_y + o, 128, h, 0, 0);
3226 o += bw * bytesperpixel;
3231 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3232 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3234 for (n = 1; o < w; n++) {
3239 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3240 s->tmp_uv[0] + o, 128, h, 0, 0);
3241 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3242 s->tmp_uv[1] + o, 128, h, 0, 0);
3243 o += bw * bytesperpixel;
3248 // pick filter level and find edges to apply filter to
3249 if (s->filter.level &&
3250 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3251 [b->mode[3] != ZEROMV]) > 0) {
3252 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3253 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3255 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3256 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3257 if (s->ss_h || s->ss_v)
3258 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3259 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3260 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3261 b->uvtx, skip_inter);
3263 if (!s->filter.lim_lut[lvl]) {
3264 int sharp = s->filter.sharpness;
3268 limit >>= (sharp + 3) >> 2;
3269 limit = FFMIN(limit, 9 - sharp);
3271 limit = FFMAX(limit, 1);
3273 s->filter.lim_lut[lvl] = limit;
3274 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3280 s->block += w4 * h4 * 64 * bytesperpixel;
3281 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3282 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3283 s->eob += 4 * w4 * h4;
3284 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3285 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3289 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3290 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3292 VP9Context *s = ctx->priv_data;
3293 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3294 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3295 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3296 s->prob.p.partition[bl][c];
3297 enum BlockPartition bp;
3298 ptrdiff_t hbs = 4 >> bl;
3299 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3300 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3301 int bytesperpixel = s->bytesperpixel;
3304 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3305 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3306 } else if (col + hbs < s->cols) { // FIXME why not <=?
3307 if (row + hbs < s->rows) { // FIXME why not <=?
3308 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3310 case PARTITION_NONE:
3311 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3314 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3315 yoff += hbs * 8 * y_stride;
3316 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3317 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3320 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3321 yoff += hbs * 8 * bytesperpixel;
3322 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3323 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3325 case PARTITION_SPLIT:
3326 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3327 decode_sb(ctx, row, col + hbs, lflvl,
3328 yoff + 8 * hbs * bytesperpixel,
3329 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3330 yoff += hbs * 8 * y_stride;
3331 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3332 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3333 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3334 yoff + 8 * hbs * bytesperpixel,
3335 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3340 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3341 bp = PARTITION_SPLIT;
3342 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3343 decode_sb(ctx, row, col + hbs, lflvl,
3344 yoff + 8 * hbs * bytesperpixel,
3345 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3348 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3350 } else if (row + hbs < s->rows) { // FIXME why not <=?
3351 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3352 bp = PARTITION_SPLIT;
3353 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3354 yoff += hbs * 8 * y_stride;
3355 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3356 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3359 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3362 bp = PARTITION_SPLIT;
3363 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3365 s->counts.partition[bl][c][bp]++;
3368 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3369 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3371 VP9Context *s = ctx->priv_data;
3373 ptrdiff_t hbs = 4 >> bl;
3374 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3375 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3376 int bytesperpixel = s->bytesperpixel;
3379 av_assert2(b->bl == BL_8X8);
3380 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3381 } else if (s->b->bl == bl) {
3382 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3383 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3384 yoff += hbs * 8 * y_stride;
3385 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3386 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3387 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3388 yoff += hbs * 8 * bytesperpixel;
3389 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3390 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3393 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3394 if (col + hbs < s->cols) { // FIXME why not <=?
3395 if (row + hbs < s->rows) {
3396 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3397 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3398 yoff += hbs * 8 * y_stride;
3399 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3400 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3401 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3402 yoff + 8 * hbs * bytesperpixel,
3403 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3405 yoff += hbs * 8 * bytesperpixel;
3406 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3407 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3409 } else if (row + hbs < s->rows) {
3410 yoff += hbs * 8 * y_stride;
3411 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3412 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3417 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3418 uint8_t *lvl, uint8_t (*mask)[4],
3419 uint8_t *dst, ptrdiff_t ls)
3421 int y, x, bytesperpixel = s->bytesperpixel;
3423 // filter edges between columns (e.g. block1 | block2)
3424 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3425 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3426 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3427 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3428 unsigned hm = hm1 | hm2 | hm13 | hm23;
3430 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3433 int L = *l, H = L >> 4;
3434 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3436 if (hmask1[0] & x) {
3437 if (hmask2[0] & x) {
3438 av_assert2(l[8 << ss_v] == L);
3439 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3441 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3443 } else if (hm2 & x) {
3446 E |= s->filter.mblim_lut[L] << 8;
3447 I |= s->filter.lim_lut[L] << 8;
3448 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3450 [0](ptr, ls, E, I, H);
3452 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3453 [0](ptr, ls, E, I, H);
3455 } else if (hm2 & x) {
3456 int L = l[8 << ss_v], H = L >> 4;
3457 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3459 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3460 [0](ptr + 8 * ls, ls, E, I, H);
3468 int L = *l, H = L >> 4;
3469 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3474 E |= s->filter.mblim_lut[L] << 8;
3475 I |= s->filter.lim_lut[L] << 8;
3476 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3478 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3480 } else if (hm23 & x) {
3481 int L = l[8 << ss_v], H = L >> 4;
3482 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3484 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3492 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3493 uint8_t *lvl, uint8_t (*mask)[4],
3494 uint8_t *dst, ptrdiff_t ls)
3496 int y, x, bytesperpixel = s->bytesperpixel;
3499 // filter edges between rows (e.g. ------)
3501 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3502 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3503 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3505 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3508 int L = *l, H = L >> 4;
3509 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3512 if (vmask[0] & (x << (1 + ss_h))) {
3513 av_assert2(l[1 + ss_h] == L);
3514 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3516 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3518 } else if (vm & (x << (1 + ss_h))) {
3521 E |= s->filter.mblim_lut[L] << 8;
3522 I |= s->filter.lim_lut[L] << 8;
3523 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3524 [!!(vmask[1] & (x << (1 + ss_h)))]
3525 [1](ptr, ls, E, I, H);
3527 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3528 [1](ptr, ls, E, I, H);
3530 } else if (vm & (x << (1 + ss_h))) {
3531 int L = l[1 + ss_h], H = L >> 4;
3532 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3534 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3535 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3540 int L = *l, H = L >> 4;
3541 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3543 if (vm3 & (x << (1 + ss_h))) {
3546 E |= s->filter.mblim_lut[L] << 8;
3547 I |= s->filter.lim_lut[L] << 8;
3548 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3550 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3552 } else if (vm3 & (x << (1 + ss_h))) {
3553 int L = l[1 + ss_h], H = L >> 4;
3554 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3556 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3569 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3570 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3572 VP9Context *s = ctx->priv_data;
3573 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3574 uint8_t *dst = f->data[0] + yoff;
3575 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3576 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3579 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3580 // if you think of them as acting on a 8x8 block max, we can interleave
3581 // each v/h within the single x loop, but that only works if we work on
3582 // 8 pixel blocks, and we won't always do that (we want at least 16px
3583 // to use SSE2 optimizations, perhaps 32 for AVX2)
3585 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3586 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3588 for (p = 0; p < 2; p++) {
3589 dst = f->data[1 + p] + uvoff;
3590 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3591 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3595 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3597 int sb_start = ( idx * n) >> log2_n;
3598 int sb_end = ((idx + 1) * n) >> log2_n;
3599 *start = FFMIN(sb_start, n) << 3;
3600 *end = FFMIN(sb_end, n) << 3;
3603 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3604 int max_count, int update_factor)
3606 unsigned ct = ct0 + ct1, p2, p1;
3612 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3613 p2 = av_clip(p2, 1, 255);
3614 ct = FFMIN(ct, max_count);
3615 update_factor = FASTDIV(update_factor * ct, max_count);
3617 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3618 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3621 static void adapt_probs(VP9Context *s)
3624 prob_context *p = &s->prob_ctx[s->framectxid].p;
3625 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3628 for (i = 0; i < 4; i++)
3629 for (j = 0; j < 2; j++)
3630 for (k = 0; k < 2; k++)
3631 for (l = 0; l < 6; l++)
3632 for (m = 0; m < 6; m++) {
3633 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3634 unsigned *e = s->counts.eob[i][j][k][l][m];
3635 unsigned *c = s->counts.coef[i][j][k][l][m];
3637 if (l == 0 && m >= 3) // dc only has 3 pt
3640 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3641 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3642 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3645 if (s->keyframe || s->intraonly) {
3646 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3647 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3648 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3649 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3654 for (i = 0; i < 3; i++)
3655 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3658 for (i = 0; i < 4; i++)
3659 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3662 if (s->comppredmode == PRED_SWITCHABLE) {
3663 for (i = 0; i < 5; i++)
3664 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3668 if (s->comppredmode != PRED_SINGLEREF) {
3669 for (i = 0; i < 5; i++)
3670 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3671 s->counts.comp_ref[i][1], 20, 128);
3674 if (s->comppredmode != PRED_COMPREF) {
3675 for (i = 0; i < 5; i++) {
3676 uint8_t *pp = p->single_ref[i];
3677 unsigned (*c)[2] = s->counts.single_ref[i];
3679 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3680 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3684 // block partitioning
3685 for (i = 0; i < 4; i++)
3686 for (j = 0; j < 4; j++) {
3687 uint8_t *pp = p->partition[i][j];
3688 unsigned *c = s->counts.partition[i][j];
3690 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3691 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3692 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3696 if (s->txfmmode == TX_SWITCHABLE) {
3697 for (i = 0; i < 2; i++) {
3698 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3700 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3701 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3702 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3703 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3704 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3705 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3709 // interpolation filter
3710 if (s->filtermode == FILTER_SWITCHABLE) {
3711 for (i = 0; i < 4; i++) {
3712 uint8_t *pp = p->filter[i];
3713 unsigned *c = s->counts.filter[i];
3715 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3716 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3721 for (i = 0; i < 7; i++) {
3722 uint8_t *pp = p->mv_mode[i];
3723 unsigned *c = s->counts.mv_mode[i];
3725 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3726 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3727 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3732 uint8_t *pp = p->mv_joint;
3733 unsigned *c = s->counts.mv_joint;
3735 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3736 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3737 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3741 for (i = 0; i < 2; i++) {
3743 unsigned *c, (*c2)[2], sum;
3745 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3746 s->counts.mv_comp[i].sign[1], 20, 128);
3748 pp = p->mv_comp[i].classes;
3749 c = s->counts.mv_comp[i].classes;
3750 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3751 adapt_prob(&pp[0], c[0], sum, 20, 128);
3753 adapt_prob(&pp[1], c[1], sum, 20, 128);
3755 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3756 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3758 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3759 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3761 adapt_prob(&pp[6], c[6], sum, 20, 128);
3762 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3763 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3764 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3766 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3767 s->counts.mv_comp[i].class0[1], 20, 128);
3768 pp = p->mv_comp[i].bits;
3769 c2 = s->counts.mv_comp[i].bits;
3770 for (j = 0; j < 10; j++)
3771 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3773 for (j = 0; j < 2; j++) {
3774 pp = p->mv_comp[i].class0_fp[j];
3775 c = s->counts.mv_comp[i].class0_fp[j];
3776 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3777 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3778 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3780 pp = p->mv_comp[i].fp;
3781 c = s->counts.mv_comp[i].fp;
3782 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3783 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3784 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3786 if (s->highprecisionmvs) {
3787 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3788 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3789 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3790 s->counts.mv_comp[i].hp[1], 20, 128);
3795 for (i = 0; i < 4; i++) {
3796 uint8_t *pp = p->y_mode[i];
3797 unsigned *c = s->counts.y_mode[i], sum, s2;
3799 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3800 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3801 sum -= c[TM_VP8_PRED];
3802 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3803 sum -= c[VERT_PRED];
3804 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3805 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3807 adapt_prob(&pp[3], s2, sum, 20, 128);
3809 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3810 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3811 sum -= c[DIAG_DOWN_LEFT_PRED];
3812 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3813 sum -= c[VERT_LEFT_PRED];
3814 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3815 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3819 for (i = 0; i < 10; i++) {
3820 uint8_t *pp = p->uv_mode[i];
3821 unsigned *c = s->counts.uv_mode[i], sum, s2;
3823 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3824 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3825 sum -= c[TM_VP8_PRED];
3826 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3827 sum -= c[VERT_PRED];
3828 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3829 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3831 adapt_prob(&pp[3], s2, sum, 20, 128);
3833 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3834 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3835 sum -= c[DIAG_DOWN_LEFT_PRED];
3836 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3837 sum -= c[VERT_LEFT_PRED];
3838 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3839 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3843 static void free_buffers(VP9Context *s)
3845 av_freep(&s->intra_pred_data[0]);
3846 av_freep(&s->b_base);
3847 av_freep(&s->block_base);
3850 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3852 VP9Context *s = ctx->priv_data;
3855 for (i = 0; i < 3; i++) {
3856 if (s->frames[i].tf.f->data[0])
3857 vp9_unref_frame(ctx, &s->frames[i]);
3858 av_frame_free(&s->frames[i].tf.f);
3860 for (i = 0; i < 8; i++) {
3861 if (s->refs[i].f->data[0])
3862 ff_thread_release_buffer(ctx, &s->refs[i]);
3863 av_frame_free(&s->refs[i].f);
3864 if (s->next_refs[i].f->data[0])
3865 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3866 av_frame_free(&s->next_refs[i].f);
3876 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3877 int *got_frame, AVPacket *pkt)
3879 const uint8_t *data = pkt->data;
3880 int size = pkt->size;
3881 VP9Context *s = ctx->priv_data;
3882 int res, tile_row, tile_col, i, ref, row, col;
3883 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3884 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3888 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3890 } else if (res == 0) {
3891 if (!s->refs[ref].f->data[0]) {
3892 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3893 return AVERROR_INVALIDDATA;
3895 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3897 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3898 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3899 for (i = 0; i < 8; i++) {
3900 if (s->next_refs[i].f->data[0])
3901 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3902 if (s->refs[i].f->data[0] &&
3903 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
3912 if (!retain_segmap_ref) {
3913 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3914 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3915 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3916 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3919 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3920 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3921 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3922 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3924 if (s->frames[CUR_FRAME].tf.f->data[0])
3925 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3926 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3928 f = s->frames[CUR_FRAME].tf.f;
3929 f->key_frame = s->keyframe;
3930 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3931 ls_y = f->linesize[0];
3932 ls_uv =f->linesize[1];
3935 for (i = 0; i < 8; i++) {
3936 if (s->next_refs[i].f->data[0])
3937 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3938 if (s->refreshrefmask & (1 << i)) {
3939 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3940 } else if (s->refs[i].f->data[0]) {
3941 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3947 // main tile decode loop
3948 bytesperpixel = s->bytesperpixel;
3949 memset(s->above_partition_ctx, 0, s->cols);
3950 memset(s->above_skip_ctx, 0, s->cols);
3951 if (s->keyframe || s->intraonly) {
3952 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3954 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3956 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3957 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
3958 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
3959 memset(s->above_segpred_ctx, 0, s->cols);
3960 s->pass = s->frames[CUR_FRAME].uses_2pass =
3961 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3962 if ((res = update_block_buffers(ctx)) < 0) {
3963 av_log(ctx, AV_LOG_ERROR,
3964 "Failed to allocate block buffers\n");
3967 if (s->refreshctx && s->parallelmode) {
3970 for (i = 0; i < 4; i++) {
3971 for (j = 0; j < 2; j++)
3972 for (k = 0; k < 2; k++)
3973 for (l = 0; l < 6; l++)
3974 for (m = 0; m < 6; m++)
3975 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3976 s->prob.coef[i][j][k][l][m], 3);
3977 if (s->txfmmode == i)
3980 s->prob_ctx[s->framectxid].p = s->prob.p;
3981 ff_thread_finish_setup(ctx);
3982 } else if (!s->refreshctx) {
3983 ff_thread_finish_setup(ctx);
3989 s->block = s->block_base;
3990 s->uvblock[0] = s->uvblock_base[0];
3991 s->uvblock[1] = s->uvblock_base[1];
3992 s->eob = s->eob_base;
3993 s->uveob[0] = s->uveob_base[0];
3994 s->uveob[1] = s->uveob_base[1];
3996 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3997 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3998 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4000 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4003 if (tile_col == s->tiling.tile_cols - 1 &&
4004 tile_row == s->tiling.tile_rows - 1) {
4007 tile_size = AV_RB32(data);
4011 if (tile_size > size) {
4012 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4013 return AVERROR_INVALIDDATA;
4015 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4016 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4017 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4018 return AVERROR_INVALIDDATA;
4025 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4026 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4027 struct VP9Filter *lflvl_ptr = s->lflvl;
4028 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4030 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4031 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4032 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4035 memset(s->left_partition_ctx, 0, 8);
4036 memset(s->left_skip_ctx, 0, 8);
4037 if (s->keyframe || s->intraonly) {
4038 memset(s->left_mode_ctx, DC_PRED, 16);
4040 memset(s->left_mode_ctx, NEARESTMV, 8);
4042 memset(s->left_y_nnz_ctx, 0, 16);
4043 memset(s->left_uv_nnz_ctx, 0, 32);
4044 memset(s->left_segpred_ctx, 0, 8);
4046 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4049 for (col = s->tiling.tile_col_start;
4050 col < s->tiling.tile_col_end;
4051 col += 8, yoff2 += 64 * bytesperpixel,
4052 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4053 // FIXME integrate with lf code (i.e. zero after each
4054 // use, similar to invtxfm coefficients, or similar)
4056 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4060 decode_sb_mem(ctx, row, col, lflvl_ptr,
4061 yoff2, uvoff2, BL_64X64);
4063 decode_sb(ctx, row, col, lflvl_ptr,
4064 yoff2, uvoff2, BL_64X64);
4068 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4076 // backup pre-loopfilter reconstruction data for intra
4077 // prediction of next row of sb64s
4078 if (row + 8 < s->rows) {
4079 memcpy(s->intra_pred_data[0],
4080 f->data[0] + yoff + 63 * ls_y,
4081 8 * s->cols * bytesperpixel);
4082 memcpy(s->intra_pred_data[1],
4083 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4084 8 * s->cols * bytesperpixel >> s->ss_h);
4085 memcpy(s->intra_pred_data[2],
4086 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4087 8 * s->cols * bytesperpixel >> s->ss_h);
4090 // loopfilter one row
4091 if (s->filter.level) {
4094 lflvl_ptr = s->lflvl;
4095 for (col = 0; col < s->cols;
4096 col += 8, yoff2 += 64 * bytesperpixel,
4097 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4098 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4102 // FIXME maybe we can make this more finegrained by running the
4103 // loopfilter per-block instead of after each sbrow
4104 // In fact that would also make intra pred left preparation easier?
4105 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4109 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4111 ff_thread_finish_setup(ctx);
4113 } while (s->pass++ == 1);
4114 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4117 for (i = 0; i < 8; i++) {
4118 if (s->refs[i].f->data[0])
4119 ff_thread_release_buffer(ctx, &s->refs[i]);
4120 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4123 if (!s->invisible) {
4124 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4132 static void vp9_decode_flush(AVCodecContext *ctx)
4134 VP9Context *s = ctx->priv_data;
4137 for (i = 0; i < 3; i++)
4138 vp9_unref_frame(ctx, &s->frames[i]);
4139 for (i = 0; i < 8; i++)
4140 ff_thread_release_buffer(ctx, &s->refs[i]);
4143 static int init_frames(AVCodecContext *ctx)
4145 VP9Context *s = ctx->priv_data;
4148 for (i = 0; i < 3; i++) {
4149 s->frames[i].tf.f = av_frame_alloc();
4150 if (!s->frames[i].tf.f) {
4151 vp9_decode_free(ctx);
4152 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4153 return AVERROR(ENOMEM);
4156 for (i = 0; i < 8; i++) {
4157 s->refs[i].f = av_frame_alloc();
4158 s->next_refs[i].f = av_frame_alloc();
4159 if (!s->refs[i].f || !s->next_refs[i].f) {
4160 vp9_decode_free(ctx);
4161 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4162 return AVERROR(ENOMEM);
4169 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4171 VP9Context *s = ctx->priv_data;
4173 ctx->internal->allocate_progress = 1;
4175 s->filter.sharpness = -1;
4177 return init_frames(ctx);
4180 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4182 return init_frames(avctx);
4185 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4188 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4190 // detect size changes in other threads
4191 if (s->intra_pred_data[0] &&
4192 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4196 for (i = 0; i < 3; i++) {
4197 if (s->frames[i].tf.f->data[0])
4198 vp9_unref_frame(dst, &s->frames[i]);
4199 if (ssrc->frames[i].tf.f->data[0]) {
4200 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4204 for (i = 0; i < 8; i++) {
4205 if (s->refs[i].f->data[0])
4206 ff_thread_release_buffer(dst, &s->refs[i]);
4207 if (ssrc->next_refs[i].f->data[0]) {
4208 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4213 s->invisible = ssrc->invisible;
4214 s->keyframe = ssrc->keyframe;
4215 s->ss_v = ssrc->ss_v;
4216 s->ss_h = ssrc->ss_h;
4217 s->segmentation.enabled = ssrc->segmentation.enabled;
4218 s->segmentation.update_map = ssrc->segmentation.update_map;
4219 s->bytesperpixel = ssrc->bytesperpixel;
4221 s->bpp_index = ssrc->bpp_index;
4222 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4223 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4224 if (ssrc->segmentation.enabled) {
4225 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4226 sizeof(s->segmentation.feat));
4232 static const AVProfile profiles[] = {
4233 { FF_PROFILE_VP9_0, "Profile 0" },
4234 { FF_PROFILE_VP9_1, "Profile 1" },
4235 { FF_PROFILE_VP9_2, "Profile 2" },
4236 { FF_PROFILE_VP9_3, "Profile 3" },
4237 { FF_PROFILE_UNKNOWN },
4240 AVCodec ff_vp9_decoder = {
4242 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4243 .type = AVMEDIA_TYPE_VIDEO,
4244 .id = AV_CODEC_ID_VP9,
4245 .priv_data_size = sizeof(VP9Context),
4246 .init = vp9_decode_init,
4247 .close = vp9_decode_free,
4248 .decode = vp9_decode_frame,
4249 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4250 .flush = vp9_decode_flush,
4251 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4252 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4253 .profiles = NULL_IF_CONFIG_SMALL(profiles),