2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
156 uint8_t ignore_refmap;
161 uint8_t skip_enabled;
170 unsigned log2_tile_cols, log2_tile_rows;
171 unsigned tile_cols, tile_rows;
172 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174 unsigned sb_cols, sb_rows, rows, cols;
177 uint8_t coef[4][2][2][6][6][3];
181 uint8_t coef[4][2][2][6][6][11];
186 unsigned y_mode[4][10];
187 unsigned uv_mode[10][10];
188 unsigned filter[4][3];
189 unsigned mv_mode[7][4];
190 unsigned intra[4][2];
192 unsigned single_ref[5][2][2];
193 unsigned comp_ref[5][2];
194 unsigned tx32p[2][4];
195 unsigned tx16p[2][3];
198 unsigned mv_joint[4];
201 unsigned classes[11];
203 unsigned bits[10][2];
204 unsigned class0_fp[2][4];
206 unsigned class0_hp[2];
209 unsigned partition[4][4][4];
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
213 enum TxfmMode txfmmode;
214 enum CompPredMode comppredmode;
216 // contextual (left/above) cache
217 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229 uint8_t *above_partition_ctx;
230 uint8_t *above_mode_ctx;
231 // FIXME maybe merge some of the below in a flags field?
232 uint8_t *above_y_nnz_ctx;
233 uint8_t *above_uv_nnz_ctx[2];
234 uint8_t *above_skip_ctx; // 1bit
235 uint8_t *above_txfm_ctx; // 2bit
236 uint8_t *above_segpred_ctx; // 1bit
237 uint8_t *above_intra_ctx; // 1bit
238 uint8_t *above_comp_ctx; // 1bit
239 uint8_t *above_ref_ctx; // 2bit
240 uint8_t *above_filter_ctx;
241 VP56mv (*above_mv_ctx)[2];
244 uint8_t *intra_pred_data[3];
245 struct VP9Filter *lflvl;
246 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248 // block reconstruction intermediates
249 int block_alloc_using_2pass;
250 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252 struct { int x, y; } min_mv, max_mv;
253 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255 uint16_t mvscale[3][2];
256 uint8_t mvstep[3][2];
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271 VP9Context *s = ctx->priv_data;
274 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276 sz = 64 * s->sb_cols * s->sb_rows;
277 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278 ff_thread_release_buffer(ctx, &f->tf);
279 return AVERROR(ENOMEM);
282 f->segmentation_map = f->extradata->data;
283 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
307 dst->uses_2pass = src->uses_2pass;
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 VP9Context *s = ctx->priv_data;
316 int bytesperpixel = s->bytesperpixel;
318 av_assert0(w > 0 && h > 0);
320 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326 s->sb_cols = (w + 63) >> 6;
327 s->sb_rows = (h + 63) >> 6;
328 s->cols = (w + 7) >> 3;
329 s->rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332 av_freep(&s->intra_pred_data[0]);
333 // FIXME we slightly over-allocate here for subsampled chroma, but a little
334 // bit of padding shouldn't affect performance...
335 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338 return AVERROR(ENOMEM);
339 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342 assign(s->above_y_nnz_ctx, uint8_t *, 16);
343 assign(s->above_mode_ctx, uint8_t *, 16);
344 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347 assign(s->above_partition_ctx, uint8_t *, 8);
348 assign(s->above_skip_ctx, uint8_t *, 8);
349 assign(s->above_txfm_ctx, uint8_t *, 8);
350 assign(s->above_segpred_ctx, uint8_t *, 8);
351 assign(s->above_intra_ctx, uint8_t *, 8);
352 assign(s->above_comp_ctx, uint8_t *, 8);
353 assign(s->above_ref_ctx, uint8_t *, 8);
354 assign(s->above_filter_ctx, uint8_t *, 8);
355 assign(s->lflvl, struct VP9Filter *, 1);
358 // these will be re-allocated a little later
359 av_freep(&s->b_base);
360 av_freep(&s->block_base);
362 if (s->bpp != s->last_bpp) {
363 ff_vp9dsp_init(&s->dsp, s->bpp);
364 ff_videodsp_init(&s->vdsp, s->bpp);
365 s->last_bpp = s->bpp;
371 static int update_block_buffers(AVCodecContext *ctx)
373 VP9Context *s = ctx->priv_data;
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380 av_free(s->block_base);
381 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383 if (s->frames[CUR_FRAME].uses_2pass) {
384 int sbs = s->sb_cols * s->sb_rows;
386 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388 16 * 16 + 2 * chroma_eobs) * sbs);
389 if (!s->b_base || !s->block_base)
390 return AVERROR(ENOMEM);
391 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397 s->b_base = av_malloc(sizeof(VP9Block));
398 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399 16 * 16 + 2 * chroma_eobs);
400 if (!s->b_base || !s->block_base)
401 return AVERROR(ENOMEM);
402 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405 s->uveob_base[0] = s->eob_base + 16 * 16;
406 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
414 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416 int v = get_bits(gb, n);
417 return get_bits1(gb) ? -v : v;
420 static av_always_inline int inv_recenter_nonneg(int v, int m)
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
428 static const int inv_map_table[254] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 /* This code is trying to do a differential probability update. For a
452 * current probability A in the range [1, 255], the difference to a new
453 * probability of any value can be expressed differentially as 1-A,255-A
454 * where some part of this (absolute range) exists both in positive as
455 * well as the negative part, whereas another part only exists in one
456 * half. We're trying to code this shared part differentially, i.e.
457 * times two where the value of the lowest bit specifies the sign, and
458 * the single part is then coded on top of this. This absolute difference
459 * then again has a value of [0,254], but a bigger value in this range
460 * indicates that we're further away from the original value A, so we
461 * can code this as a VLC code, since higher values are increasingly
462 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463 * updates vs. the 'fine, exact' updates further down the range, which
464 * adds one extra dimension to this differential update model. */
466 if (!vp8_rac_get(c)) {
467 d = vp8_rac_get_uint(c, 4) + 0;
468 } else if (!vp8_rac_get(c)) {
469 d = vp8_rac_get_uint(c, 4) + 16;
470 } else if (!vp8_rac_get(c)) {
471 d = vp8_rac_get_uint(c, 5) + 32;
473 d = vp8_rac_get_uint(c, 7);
475 d = (d << 1) - 65 + vp8_rac_get(c);
479 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
480 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
483 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
485 static const enum AVColorSpace colorspaces[8] = {
486 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
487 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
489 VP9Context *s = ctx->priv_data;
490 enum AVPixelFormat res;
491 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
494 s->bpp = 8 + bits * 2;
495 s->bytesperpixel = (7 + s->bpp) >> 3;
496 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
497 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
498 static const enum AVPixelFormat pix_fmt_rgb[3] = {
499 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
501 if (ctx->profile & 1) {
502 s->ss_h = s->ss_v = 1;
503 res = pix_fmt_rgb[bits];
504 ctx->color_range = AVCOL_RANGE_JPEG;
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
508 return AVERROR_INVALIDDATA;
511 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
512 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
513 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
514 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
515 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
516 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
517 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
519 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
520 if (ctx->profile & 1) {
521 s->ss_h = get_bits1(&s->gb);
522 s->ss_v = get_bits1(&s->gb);
523 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
524 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
526 return AVERROR_INVALIDDATA;
527 } else if (get_bits1(&s->gb)) {
528 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
530 return AVERROR_INVALIDDATA;
533 s->ss_h = s->ss_v = 1;
534 res = pix_fmt_for_ss[bits][1][1];
541 static int decode_frame_header(AVCodecContext *ctx,
542 const uint8_t *data, int size, int *ref)
544 VP9Context *s = ctx->priv_data;
545 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
546 enum AVPixelFormat fmt = ctx->pix_fmt;
548 const uint8_t *data2;
551 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
552 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
555 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
556 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
557 return AVERROR_INVALIDDATA;
559 ctx->profile = get_bits1(&s->gb);
560 ctx->profile |= get_bits1(&s->gb) << 1;
561 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
562 if (ctx->profile > 3) {
563 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
564 return AVERROR_INVALIDDATA;
566 if (get_bits1(&s->gb)) {
567 *ref = get_bits(&s->gb, 3);
570 s->last_keyframe = s->keyframe;
571 s->keyframe = !get_bits1(&s->gb);
572 last_invisible = s->invisible;
573 s->invisible = !get_bits1(&s->gb);
574 s->errorres = get_bits1(&s->gb);
575 s->use_last_frame_mvs = !s->errorres && !last_invisible;
577 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
578 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
579 return AVERROR_INVALIDDATA;
581 if ((fmt = read_colorspace_details(ctx)) < 0)
583 // for profile 1, here follows the subsampling bits
584 s->refreshrefmask = 0xff;
585 w = get_bits(&s->gb, 16) + 1;
586 h = get_bits(&s->gb, 16) + 1;
587 if (get_bits1(&s->gb)) // display size
588 skip_bits(&s->gb, 32);
590 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
591 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
593 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
594 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
595 return AVERROR_INVALIDDATA;
597 if (ctx->profile == 1) {
598 if ((fmt = read_colorspace_details(ctx)) < 0)
601 s->ss_h = s->ss_v = 1;
604 s->bytesperpixel = 1;
605 fmt = AV_PIX_FMT_YUV420P;
606 ctx->colorspace = AVCOL_SPC_BT470BG;
607 ctx->color_range = AVCOL_RANGE_JPEG;
609 s->refreshrefmask = get_bits(&s->gb, 8);
610 w = get_bits(&s->gb, 16) + 1;
611 h = get_bits(&s->gb, 16) + 1;
612 if (get_bits1(&s->gb)) // display size
613 skip_bits(&s->gb, 32);
615 s->refreshrefmask = get_bits(&s->gb, 8);
616 s->refidx[0] = get_bits(&s->gb, 3);
617 s->signbias[0] = get_bits1(&s->gb);
618 s->refidx[1] = get_bits(&s->gb, 3);
619 s->signbias[1] = get_bits1(&s->gb);
620 s->refidx[2] = get_bits(&s->gb, 3);
621 s->signbias[2] = get_bits1(&s->gb);
622 if (!s->refs[s->refidx[0]].f->data[0] ||
623 !s->refs[s->refidx[1]].f->data[0] ||
624 !s->refs[s->refidx[2]].f->data[0]) {
625 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
626 return AVERROR_INVALIDDATA;
628 if (get_bits1(&s->gb)) {
629 w = s->refs[s->refidx[0]].f->width;
630 h = s->refs[s->refidx[0]].f->height;
631 } else if (get_bits1(&s->gb)) {
632 w = s->refs[s->refidx[1]].f->width;
633 h = s->refs[s->refidx[1]].f->height;
634 } else if (get_bits1(&s->gb)) {
635 w = s->refs[s->refidx[2]].f->width;
636 h = s->refs[s->refidx[2]].f->height;
638 w = get_bits(&s->gb, 16) + 1;
639 h = get_bits(&s->gb, 16) + 1;
641 // Note that in this code, "CUR_FRAME" is actually before we
642 // have formally allocated a frame, and thus actually represents
644 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
645 s->frames[CUR_FRAME].tf.f->height == h;
646 if (get_bits1(&s->gb)) // display size
647 skip_bits(&s->gb, 32);
648 s->highprecisionmvs = get_bits1(&s->gb);
649 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
651 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
652 s->signbias[0] != s->signbias[2];
653 if (s->allowcompinter) {
654 if (s->signbias[0] == s->signbias[1]) {
656 s->varcompref[0] = 0;
657 s->varcompref[1] = 1;
658 } else if (s->signbias[0] == s->signbias[2]) {
660 s->varcompref[0] = 0;
661 s->varcompref[1] = 2;
664 s->varcompref[0] = 1;
665 s->varcompref[1] = 2;
669 for (i = 0; i < 3; i++) {
670 AVFrame *ref = s->refs[s->refidx[i]].f;
671 int refw = ref->width, refh = ref->height;
673 if (ref->format != fmt) {
674 av_log(ctx, AV_LOG_ERROR,
675 "Ref pixfmt (%s) did not match current frame (%s)",
676 av_get_pix_fmt_name(ref->format),
677 av_get_pix_fmt_name(fmt));
678 return AVERROR_INVALIDDATA;
679 } else if (refw == w && refh == h) {
680 s->mvscale[i][0] = s->mvscale[i][1] = 0;
682 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
683 av_log(ctx, AV_LOG_ERROR,
684 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
686 return AVERROR_INVALIDDATA;
688 s->mvscale[i][0] = (refw << 14) / w;
689 s->mvscale[i][1] = (refh << 14) / h;
690 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
691 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
696 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
697 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
698 s->framectxid = c = get_bits(&s->gb, 2);
700 /* loopfilter header data */
701 s->filter.level = get_bits(&s->gb, 6);
702 sharp = get_bits(&s->gb, 3);
703 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
704 // the old cache values since they are still valid
705 if (s->filter.sharpness != sharp)
706 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
707 s->filter.sharpness = sharp;
708 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
709 if (get_bits1(&s->gb)) {
710 for (i = 0; i < 4; i++)
711 if (get_bits1(&s->gb))
712 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
713 for (i = 0; i < 2; i++)
714 if (get_bits1(&s->gb))
715 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
719 /* quantization header data */
720 s->yac_qi = get_bits(&s->gb, 8);
721 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
722 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
723 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
724 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
725 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
727 /* segmentation header info */
728 s->segmentation.ignore_refmap = 0;
729 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
730 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
731 for (i = 0; i < 7; i++)
732 s->prob.seg[i] = get_bits1(&s->gb) ?
733 get_bits(&s->gb, 8) : 255;
734 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
735 for (i = 0; i < 3; i++)
736 s->prob.segpred[i] = get_bits1(&s->gb) ?
737 get_bits(&s->gb, 8) : 255;
740 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
741 (w != s->frames[CUR_FRAME].tf.f->width ||
742 h != s->frames[CUR_FRAME].tf.f->height)) {
743 av_log(ctx, AV_LOG_WARNING,
744 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
745 s->segmentation.temporal, s->segmentation.update_map);
746 s->segmentation.ignore_refmap = 1;
747 //return AVERROR_INVALIDDATA;
750 if (get_bits1(&s->gb)) {
751 s->segmentation.absolute_vals = get_bits1(&s->gb);
752 for (i = 0; i < 8; i++) {
753 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
754 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
755 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
756 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
757 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
758 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
759 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
763 s->segmentation.feat[0].q_enabled = 0;
764 s->segmentation.feat[0].lf_enabled = 0;
765 s->segmentation.feat[0].skip_enabled = 0;
766 s->segmentation.feat[0].ref_enabled = 0;
769 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
770 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
771 int qyac, qydc, quvac, quvdc, lflvl, sh;
773 if (s->segmentation.feat[i].q_enabled) {
774 if (s->segmentation.absolute_vals)
775 qyac = s->segmentation.feat[i].q_val;
777 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
781 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
782 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
783 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
784 qyac = av_clip_uintp2(qyac, 8);
786 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
787 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
788 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
789 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
791 sh = s->filter.level >= 32;
792 if (s->segmentation.feat[i].lf_enabled) {
793 if (s->segmentation.absolute_vals)
794 lflvl = s->segmentation.feat[i].lf_val;
796 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
798 lflvl = s->filter.level;
800 if (s->lf_delta.enabled) {
801 s->segmentation.feat[i].lflvl[0][0] =
802 s->segmentation.feat[i].lflvl[0][1] =
803 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
804 for (j = 1; j < 4; j++) {
805 s->segmentation.feat[i].lflvl[j][0] =
806 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
807 s->lf_delta.mode[0]) * (1 << sh)), 6);
808 s->segmentation.feat[i].lflvl[j][1] =
809 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
810 s->lf_delta.mode[1]) * (1 << sh)), 6);
813 memset(s->segmentation.feat[i].lflvl, lflvl,
814 sizeof(s->segmentation.feat[i].lflvl));
819 if ((res = update_size(ctx, w, h, fmt)) < 0) {
820 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
823 for (s->tiling.log2_tile_cols = 0;
824 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
825 s->tiling.log2_tile_cols++) ;
826 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
827 max = FFMAX(0, max - 1);
828 while (max > s->tiling.log2_tile_cols) {
829 if (get_bits1(&s->gb))
830 s->tiling.log2_tile_cols++;
834 s->tiling.log2_tile_rows = decode012(&s->gb);
835 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
836 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
837 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
838 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
839 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
841 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
842 return AVERROR(ENOMEM);
846 if (s->keyframe || s->errorres || s->intraonly) {
847 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
848 s->prob_ctx[3].p = vp9_default_probs;
849 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
850 sizeof(vp9_default_coef_probs));
851 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
852 sizeof(vp9_default_coef_probs));
853 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
854 sizeof(vp9_default_coef_probs));
855 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
856 sizeof(vp9_default_coef_probs));
859 // next 16 bits is size of the rest of the header (arith-coded)
860 size2 = get_bits(&s->gb, 16);
861 data2 = align_get_bits(&s->gb);
862 if (size2 > size - (data2 - data)) {
863 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
864 return AVERROR_INVALIDDATA;
866 ff_vp56_init_range_decoder(&s->c, data2, size2);
867 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
868 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
869 return AVERROR_INVALIDDATA;
872 if (s->keyframe || s->intraonly) {
873 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
875 memset(&s->counts, 0, sizeof(s->counts));
877 // FIXME is it faster to not copy here, but do it down in the fw updates
878 // as explicit copies if the fw update is missing (and skip the copy upon
880 s->prob.p = s->prob_ctx[c].p;
884 s->txfmmode = TX_4X4;
886 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
887 if (s->txfmmode == 3)
888 s->txfmmode += vp8_rac_get(&s->c);
890 if (s->txfmmode == TX_SWITCHABLE) {
891 for (i = 0; i < 2; i++)
892 if (vp56_rac_get_prob_branchy(&s->c, 252))
893 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
894 for (i = 0; i < 2; i++)
895 for (j = 0; j < 2; j++)
896 if (vp56_rac_get_prob_branchy(&s->c, 252))
897 s->prob.p.tx16p[i][j] =
898 update_prob(&s->c, s->prob.p.tx16p[i][j]);
899 for (i = 0; i < 2; i++)
900 for (j = 0; j < 3; j++)
901 if (vp56_rac_get_prob_branchy(&s->c, 252))
902 s->prob.p.tx32p[i][j] =
903 update_prob(&s->c, s->prob.p.tx32p[i][j]);
908 for (i = 0; i < 4; i++) {
909 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
910 if (vp8_rac_get(&s->c)) {
911 for (j = 0; j < 2; j++)
912 for (k = 0; k < 2; k++)
913 for (l = 0; l < 6; l++)
914 for (m = 0; m < 6; m++) {
915 uint8_t *p = s->prob.coef[i][j][k][l][m];
916 uint8_t *r = ref[j][k][l][m];
917 if (m >= 3 && l == 0) // dc only has 3 pt
919 for (n = 0; n < 3; n++) {
920 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
921 p[n] = update_prob(&s->c, r[n]);
929 for (j = 0; j < 2; j++)
930 for (k = 0; k < 2; k++)
931 for (l = 0; l < 6; l++)
932 for (m = 0; m < 6; m++) {
933 uint8_t *p = s->prob.coef[i][j][k][l][m];
934 uint8_t *r = ref[j][k][l][m];
935 if (m > 3 && l == 0) // dc only has 3 pt
941 if (s->txfmmode == i)
946 for (i = 0; i < 3; i++)
947 if (vp56_rac_get_prob_branchy(&s->c, 252))
948 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
949 if (!s->keyframe && !s->intraonly) {
950 for (i = 0; i < 7; i++)
951 for (j = 0; j < 3; j++)
952 if (vp56_rac_get_prob_branchy(&s->c, 252))
953 s->prob.p.mv_mode[i][j] =
954 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
956 if (s->filtermode == FILTER_SWITCHABLE)
957 for (i = 0; i < 4; i++)
958 for (j = 0; j < 2; j++)
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
960 s->prob.p.filter[i][j] =
961 update_prob(&s->c, s->prob.p.filter[i][j]);
963 for (i = 0; i < 4; i++)
964 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
967 if (s->allowcompinter) {
968 s->comppredmode = vp8_rac_get(&s->c);
970 s->comppredmode += vp8_rac_get(&s->c);
971 if (s->comppredmode == PRED_SWITCHABLE)
972 for (i = 0; i < 5; i++)
973 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 update_prob(&s->c, s->prob.p.comp[i]);
977 s->comppredmode = PRED_SINGLEREF;
980 if (s->comppredmode != PRED_COMPREF) {
981 for (i = 0; i < 5; i++) {
982 if (vp56_rac_get_prob_branchy(&s->c, 252))
983 s->prob.p.single_ref[i][0] =
984 update_prob(&s->c, s->prob.p.single_ref[i][0]);
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.single_ref[i][1] =
987 update_prob(&s->c, s->prob.p.single_ref[i][1]);
991 if (s->comppredmode != PRED_SINGLEREF) {
992 for (i = 0; i < 5; i++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.comp_ref[i] =
995 update_prob(&s->c, s->prob.p.comp_ref[i]);
998 for (i = 0; i < 4; i++)
999 for (j = 0; j < 9; j++)
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.y_mode[i][j] =
1002 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1004 for (i = 0; i < 4; i++)
1005 for (j = 0; j < 4; j++)
1006 for (k = 0; k < 3; k++)
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.partition[3 - i][j][k] =
1009 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1011 // mv fields don't use the update_prob subexp model for some reason
1012 for (i = 0; i < 3; i++)
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1016 for (i = 0; i < 2; i++) {
1017 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1020 for (j = 0; j < 10; j++)
1021 if (vp56_rac_get_prob_branchy(&s->c, 252))
1022 s->prob.p.mv_comp[i].classes[j] =
1023 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1025 if (vp56_rac_get_prob_branchy(&s->c, 252))
1026 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1028 for (j = 0; j < 10; j++)
1029 if (vp56_rac_get_prob_branchy(&s->c, 252))
1030 s->prob.p.mv_comp[i].bits[j] =
1031 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1034 for (i = 0; i < 2; i++) {
1035 for (j = 0; j < 2; j++)
1036 for (k = 0; k < 3; k++)
1037 if (vp56_rac_get_prob_branchy(&s->c, 252))
1038 s->prob.p.mv_comp[i].class0_fp[j][k] =
1039 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041 for (j = 0; j < 3; j++)
1042 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043 s->prob.p.mv_comp[i].fp[j] =
1044 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1047 if (s->highprecisionmvs) {
1048 for (i = 0; i < 2; i++) {
1049 if (vp56_rac_get_prob_branchy(&s->c, 252))
1050 s->prob.p.mv_comp[i].class0_hp =
1051 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1053 if (vp56_rac_get_prob_branchy(&s->c, 252))
1054 s->prob.p.mv_comp[i].hp =
1055 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1060 return (data2 - data) + size2;
1063 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1066 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1067 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1070 static void find_ref_mvs(VP9Context *s,
1071 VP56mv *pmv, int ref, int z, int idx, int sb)
1073 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1074 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1075 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1076 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1077 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1078 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1079 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1080 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1081 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1082 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1083 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1084 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1085 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1086 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1087 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1088 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1089 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1090 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1091 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1092 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1093 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1094 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1095 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1096 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1097 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1098 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1099 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1102 int row = s->row, col = s->col, row7 = s->row7;
1103 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1104 #define INVALID_MV 0x80008000U
1105 uint32_t mem = INVALID_MV;
1108 #define RETURN_DIRECT_MV(mv) \
1110 uint32_t m = AV_RN32A(&mv); \
1114 } else if (mem == INVALID_MV) { \
1116 } else if (m != mem) { \
1123 if (sb == 2 || sb == 1) {
1124 RETURN_DIRECT_MV(b->mv[0][z]);
1125 } else if (sb == 3) {
1126 RETURN_DIRECT_MV(b->mv[2][z]);
1127 RETURN_DIRECT_MV(b->mv[1][z]);
1128 RETURN_DIRECT_MV(b->mv[0][z]);
1131 #define RETURN_MV(mv) \
1136 clamp_mv(&tmp, &mv, s); \
1137 m = AV_RN32A(&tmp); \
1141 } else if (mem == INVALID_MV) { \
1143 } else if (m != mem) { \
1148 uint32_t m = AV_RN32A(&mv); \
1150 clamp_mv(pmv, &mv, s); \
1152 } else if (mem == INVALID_MV) { \
1154 } else if (m != mem) { \
1155 clamp_mv(pmv, &mv, s); \
1162 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1163 if (mv->ref[0] == ref) {
1164 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1165 } else if (mv->ref[1] == ref) {
1166 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1169 if (col > s->tiling.tile_col_start) {
1170 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1171 if (mv->ref[0] == ref) {
1172 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1173 } else if (mv->ref[1] == ref) {
1174 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1182 // previously coded MVs in this neighbourhood, using same reference frame
1183 for (; i < 8; i++) {
1184 int c = p[i][0] + col, r = p[i][1] + row;
1186 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1187 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1189 if (mv->ref[0] == ref) {
1190 RETURN_MV(mv->mv[0]);
1191 } else if (mv->ref[1] == ref) {
1192 RETURN_MV(mv->mv[1]);
1197 // MV at this position in previous frame, using same reference frame
1198 if (s->use_last_frame_mvs) {
1199 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1201 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1202 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1203 if (mv->ref[0] == ref) {
1204 RETURN_MV(mv->mv[0]);
1205 } else if (mv->ref[1] == ref) {
1206 RETURN_MV(mv->mv[1]);
1210 #define RETURN_SCALE_MV(mv, scale) \
1213 VP56mv mv_temp = { -mv.x, -mv.y }; \
1214 RETURN_MV(mv_temp); \
1220 // previously coded MVs in this neighbourhood, using different reference frame
1221 for (i = 0; i < 8; i++) {
1222 int c = p[i][0] + col, r = p[i][1] + row;
1224 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1225 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1227 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1228 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1230 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1231 // BUG - libvpx has this condition regardless of whether
1232 // we used the first ref MV and pre-scaling
1233 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1234 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1239 // MV at this position in previous frame, using different reference frame
1240 if (s->use_last_frame_mvs) {
1241 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1243 // no need to await_progress, because we already did that above
1244 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1245 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1247 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1248 // BUG - libvpx has this condition regardless of whether
1249 // we used the first ref MV and pre-scaling
1250 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1251 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1258 #undef RETURN_SCALE_MV
1261 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1263 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1264 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1265 s->prob.p.mv_comp[idx].classes);
1267 s->counts.mv_comp[idx].sign[sign]++;
1268 s->counts.mv_comp[idx].classes[c]++;
1272 for (n = 0, m = 0; m < c; m++) {
1273 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1275 s->counts.mv_comp[idx].bits[m][bit]++;
1278 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1280 s->counts.mv_comp[idx].fp[bit]++;
1282 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1283 s->counts.mv_comp[idx].hp[bit]++;
1287 // bug in libvpx - we count for bw entropy purposes even if the
1289 s->counts.mv_comp[idx].hp[1]++;
1293 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1294 s->counts.mv_comp[idx].class0[n]++;
1295 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1296 s->prob.p.mv_comp[idx].class0_fp[n]);
1297 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1298 n = (n << 3) | (bit << 1);
1300 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1301 s->counts.mv_comp[idx].class0_hp[bit]++;
1305 // bug in libvpx - we count for bw entropy purposes even if the
1307 s->counts.mv_comp[idx].class0_hp[1]++;
1311 return sign ? -(n + 1) : (n + 1);
1314 static void fill_mv(VP9Context *s,
1315 VP56mv *mv, int mode, int sb)
1319 if (mode == ZEROMV) {
1324 // FIXME cache this value and reuse for other subblocks
1325 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1326 mode == NEWMV ? -1 : sb);
1327 // FIXME maybe move this code into find_ref_mvs()
1328 if ((mode == NEWMV || sb == -1) &&
1329 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1343 if (mode == NEWMV) {
1344 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1345 s->prob.p.mv_joint);
1347 s->counts.mv_joint[j]++;
1348 if (j >= MV_JOINT_V)
1349 mv[0].y += read_mv_component(s, 0, hp);
1351 mv[0].x += read_mv_component(s, 1, hp);
1355 // FIXME cache this value and reuse for other subblocks
1356 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1357 mode == NEWMV ? -1 : sb);
1358 if ((mode == NEWMV || sb == -1) &&
1359 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1373 if (mode == NEWMV) {
1374 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1375 s->prob.p.mv_joint);
1377 s->counts.mv_joint[j]++;
1378 if (j >= MV_JOINT_V)
1379 mv[1].y += read_mv_component(s, 0, hp);
1381 mv[1].x += read_mv_component(s, 1, hp);
1387 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1388 ptrdiff_t stride, int v)
1398 int v16 = v * 0x0101;
1406 uint32_t v32 = v * 0x01010101;
1415 uint64_t v64 = v * 0x0101010101010101ULL;
1421 uint32_t v32 = v * 0x01010101;
1424 AV_WN32A(ptr + 4, v32);
1433 static void decode_mode(AVCodecContext *ctx)
1435 static const uint8_t left_ctx[N_BS_SIZES] = {
1436 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1438 static const uint8_t above_ctx[N_BS_SIZES] = {
1439 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1441 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1442 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1443 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1445 VP9Context *s = ctx->priv_data;
1447 int row = s->row, col = s->col, row7 = s->row7;
1448 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1449 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1450 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1451 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1452 int vref, filter_id;
1454 if (!s->segmentation.enabled) {
1456 } else if (s->keyframe || s->intraonly) {
1457 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1458 } else if (!s->segmentation.update_map ||
1459 (s->segmentation.temporal &&
1460 vp56_rac_get_prob_branchy(&s->c,
1461 s->prob.segpred[s->above_segpred_ctx[col] +
1462 s->left_segpred_ctx[row7]]))) {
1463 if (!s->errorres && !s->segmentation.ignore_refmap) {
1465 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1467 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1468 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1469 for (y = 0; y < h4; y++) {
1470 int idx_base = (y + row) * 8 * s->sb_cols + col;
1471 for (x = 0; x < w4; x++)
1472 pred = FFMIN(pred, refsegmap[idx_base + x]);
1474 av_assert1(pred < 8);
1480 memset(&s->above_segpred_ctx[col], 1, w4);
1481 memset(&s->left_segpred_ctx[row7], 1, h4);
1483 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1486 memset(&s->above_segpred_ctx[col], 0, w4);
1487 memset(&s->left_segpred_ctx[row7], 0, h4);
1489 if (s->segmentation.enabled &&
1490 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1491 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1492 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1495 b->skip = s->segmentation.enabled &&
1496 s->segmentation.feat[b->seg_id].skip_enabled;
1498 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1499 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1500 s->counts.skip[c][b->skip]++;
1503 if (s->keyframe || s->intraonly) {
1505 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1506 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1510 if (have_a && have_l) {
1511 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1514 c = have_a ? 2 * s->above_intra_ctx[col] :
1515 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1517 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1518 s->counts.intra[c][bit]++;
1522 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1526 c = (s->above_skip_ctx[col] ? max_tx :
1527 s->above_txfm_ctx[col]) +
1528 (s->left_skip_ctx[row7] ? max_tx :
1529 s->left_txfm_ctx[row7]) > max_tx;
1531 c = s->above_skip_ctx[col] ? 1 :
1532 (s->above_txfm_ctx[col] * 2 > max_tx);
1534 } else if (have_l) {
1535 c = s->left_skip_ctx[row7] ? 1 :
1536 (s->left_txfm_ctx[row7] * 2 > max_tx);
1542 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1544 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1546 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1548 s->counts.tx32p[c][b->tx]++;
1551 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1553 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1554 s->counts.tx16p[c][b->tx]++;
1557 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1558 s->counts.tx8p[c][b->tx]++;
1565 b->tx = FFMIN(max_tx, s->txfmmode);
1568 if (s->keyframe || s->intraonly) {
1569 uint8_t *a = &s->above_mode_ctx[col * 2];
1570 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1573 if (b->bs > BS_8x8) {
1574 // FIXME the memory storage intermediates here aren't really
1575 // necessary, they're just there to make the code slightly
1577 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1578 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1579 if (b->bs != BS_8x4) {
1580 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1581 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1582 l[0] = a[1] = b->mode[1];
1584 l[0] = a[1] = b->mode[1] = b->mode[0];
1586 if (b->bs != BS_4x8) {
1587 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1588 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1589 if (b->bs != BS_8x4) {
1590 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1591 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1592 l[1] = a[1] = b->mode[3];
1594 l[1] = a[1] = b->mode[3] = b->mode[2];
1597 b->mode[2] = b->mode[0];
1598 l[1] = a[1] = b->mode[3] = b->mode[1];
1601 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1602 vp9_default_kf_ymode_probs[*a][*l]);
1603 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1604 // FIXME this can probably be optimized
1605 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1606 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1608 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1609 vp9_default_kf_uvmode_probs[b->mode[3]]);
1610 } else if (b->intra) {
1612 if (b->bs > BS_8x8) {
1613 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1614 s->prob.p.y_mode[0]);
1615 s->counts.y_mode[0][b->mode[0]]++;
1616 if (b->bs != BS_8x4) {
1617 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1618 s->prob.p.y_mode[0]);
1619 s->counts.y_mode[0][b->mode[1]]++;
1621 b->mode[1] = b->mode[0];
1623 if (b->bs != BS_4x8) {
1624 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1625 s->prob.p.y_mode[0]);
1626 s->counts.y_mode[0][b->mode[2]]++;
1627 if (b->bs != BS_8x4) {
1628 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1629 s->prob.p.y_mode[0]);
1630 s->counts.y_mode[0][b->mode[3]]++;
1632 b->mode[3] = b->mode[2];
1635 b->mode[2] = b->mode[0];
1636 b->mode[3] = b->mode[1];
1639 static const uint8_t size_group[10] = {
1640 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1642 int sz = size_group[b->bs];
1644 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1645 s->prob.p.y_mode[sz]);
1646 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1647 s->counts.y_mode[sz][b->mode[3]]++;
1649 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1650 s->prob.p.uv_mode[b->mode[3]]);
1651 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1653 static const uint8_t inter_mode_ctx_lut[14][14] = {
1654 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1655 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1656 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1657 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1658 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1659 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1660 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1661 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1662 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1663 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1664 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1665 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1666 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1667 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1670 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1671 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1673 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1675 // read comp_pred flag
1676 if (s->comppredmode != PRED_SWITCHABLE) {
1677 b->comp = s->comppredmode == PRED_COMPREF;
1681 // FIXME add intra as ref=0xff (or -1) to make these easier?
1684 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1686 } else if (s->above_comp_ctx[col]) {
1687 c = 2 + (s->left_intra_ctx[row7] ||
1688 s->left_ref_ctx[row7] == s->fixcompref);
1689 } else if (s->left_comp_ctx[row7]) {
1690 c = 2 + (s->above_intra_ctx[col] ||
1691 s->above_ref_ctx[col] == s->fixcompref);
1693 c = (!s->above_intra_ctx[col] &&
1694 s->above_ref_ctx[col] == s->fixcompref) ^
1695 (!s->left_intra_ctx[row7] &&
1696 s->left_ref_ctx[row & 7] == s->fixcompref);
1699 c = s->above_comp_ctx[col] ? 3 :
1700 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1702 } else if (have_l) {
1703 c = s->left_comp_ctx[row7] ? 3 :
1704 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1708 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1709 s->counts.comp[c][b->comp]++;
1712 // read actual references
1713 // FIXME probably cache a few variables here to prevent repetitive
1714 // memory accesses below
1715 if (b->comp) /* two references */ {
1716 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1718 b->ref[fix_idx] = s->fixcompref;
1719 // FIXME can this codeblob be replaced by some sort of LUT?
1722 if (s->above_intra_ctx[col]) {
1723 if (s->left_intra_ctx[row7]) {
1726 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1728 } else if (s->left_intra_ctx[row7]) {
1729 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1731 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1733 if (refl == refa && refa == s->varcompref[1]) {
1735 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1736 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1737 (refl == s->fixcompref && refa == s->varcompref[0])) {
1740 c = (refa == refl) ? 3 : 1;
1742 } else if (!s->left_comp_ctx[row7]) {
1743 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1746 c = (refl == s->varcompref[1] &&
1747 refa != s->varcompref[1]) ? 2 : 4;
1749 } else if (!s->above_comp_ctx[col]) {
1750 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1753 c = (refa == s->varcompref[1] &&
1754 refl != s->varcompref[1]) ? 2 : 4;
1757 c = (refl == refa) ? 4 : 2;
1761 if (s->above_intra_ctx[col]) {
1763 } else if (s->above_comp_ctx[col]) {
1764 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1766 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1769 } else if (have_l) {
1770 if (s->left_intra_ctx[row7]) {
1772 } else if (s->left_comp_ctx[row7]) {
1773 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1775 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1780 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1781 b->ref[var_idx] = s->varcompref[bit];
1782 s->counts.comp_ref[c][bit]++;
1783 } else /* single reference */ {
1786 if (have_a && !s->above_intra_ctx[col]) {
1787 if (have_l && !s->left_intra_ctx[row7]) {
1788 if (s->left_comp_ctx[row7]) {
1789 if (s->above_comp_ctx[col]) {
1790 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1791 !s->above_ref_ctx[col]);
1793 c = (3 * !s->above_ref_ctx[col]) +
1794 (!s->fixcompref || !s->left_ref_ctx[row7]);
1796 } else if (s->above_comp_ctx[col]) {
1797 c = (3 * !s->left_ref_ctx[row7]) +
1798 (!s->fixcompref || !s->above_ref_ctx[col]);
1800 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1802 } else if (s->above_intra_ctx[col]) {
1804 } else if (s->above_comp_ctx[col]) {
1805 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1807 c = 4 * (!s->above_ref_ctx[col]);
1809 } else if (have_l && !s->left_intra_ctx[row7]) {
1810 if (s->left_intra_ctx[row7]) {
1812 } else if (s->left_comp_ctx[row7]) {
1813 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1815 c = 4 * (!s->left_ref_ctx[row7]);
1820 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1821 s->counts.single_ref[c][0][bit]++;
1825 // FIXME can this codeblob be replaced by some sort of LUT?
1828 if (s->left_intra_ctx[row7]) {
1829 if (s->above_intra_ctx[col]) {
1831 } else if (s->above_comp_ctx[col]) {
1832 c = 1 + 2 * (s->fixcompref == 1 ||
1833 s->above_ref_ctx[col] == 1);
1834 } else if (!s->above_ref_ctx[col]) {
1837 c = 4 * (s->above_ref_ctx[col] == 1);
1839 } else if (s->above_intra_ctx[col]) {
1840 if (s->left_intra_ctx[row7]) {
1842 } else if (s->left_comp_ctx[row7]) {
1843 c = 1 + 2 * (s->fixcompref == 1 ||
1844 s->left_ref_ctx[row7] == 1);
1845 } else if (!s->left_ref_ctx[row7]) {
1848 c = 4 * (s->left_ref_ctx[row7] == 1);
1850 } else if (s->above_comp_ctx[col]) {
1851 if (s->left_comp_ctx[row7]) {
1852 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1853 c = 3 * (s->fixcompref == 1 ||
1854 s->left_ref_ctx[row7] == 1);
1858 } else if (!s->left_ref_ctx[row7]) {
1859 c = 1 + 2 * (s->fixcompref == 1 ||
1860 s->above_ref_ctx[col] == 1);
1862 c = 3 * (s->left_ref_ctx[row7] == 1) +
1863 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1865 } else if (s->left_comp_ctx[row7]) {
1866 if (!s->above_ref_ctx[col]) {
1867 c = 1 + 2 * (s->fixcompref == 1 ||
1868 s->left_ref_ctx[row7] == 1);
1870 c = 3 * (s->above_ref_ctx[col] == 1) +
1871 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1873 } else if (!s->above_ref_ctx[col]) {
1874 if (!s->left_ref_ctx[row7]) {
1877 c = 4 * (s->left_ref_ctx[row7] == 1);
1879 } else if (!s->left_ref_ctx[row7]) {
1880 c = 4 * (s->above_ref_ctx[col] == 1);
1882 c = 2 * (s->left_ref_ctx[row7] == 1) +
1883 2 * (s->above_ref_ctx[col] == 1);
1886 if (s->above_intra_ctx[col] ||
1887 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1889 } else if (s->above_comp_ctx[col]) {
1890 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1892 c = 4 * (s->above_ref_ctx[col] == 1);
1895 } else if (have_l) {
1896 if (s->left_intra_ctx[row7] ||
1897 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1899 } else if (s->left_comp_ctx[row7]) {
1900 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1902 c = 4 * (s->left_ref_ctx[row7] == 1);
1907 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1908 s->counts.single_ref[c][1][bit]++;
1909 b->ref[0] = 1 + bit;
1914 if (b->bs <= BS_8x8) {
1915 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1916 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1918 static const uint8_t off[10] = {
1919 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1922 // FIXME this needs to use the LUT tables from find_ref_mvs
1923 // because not all are -1,0/0,-1
1924 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1925 [s->left_mode_ctx[row7 + off[b->bs]]];
1927 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1928 s->prob.p.mv_mode[c]);
1929 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1930 s->counts.mv_mode[c][b->mode[0] - 10]++;
1934 if (s->filtermode == FILTER_SWITCHABLE) {
1937 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1938 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1939 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1940 s->left_filter_ctx[row7] : 3;
1942 c = s->above_filter_ctx[col];
1944 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1945 c = s->left_filter_ctx[row7];
1950 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1951 s->prob.p.filter[c]);
1952 s->counts.filter[c][filter_id]++;
1953 b->filter = vp9_filter_lut[filter_id];
1955 b->filter = s->filtermode;
1958 if (b->bs > BS_8x8) {
1959 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1961 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1962 s->prob.p.mv_mode[c]);
1963 s->counts.mv_mode[c][b->mode[0] - 10]++;
1964 fill_mv(s, b->mv[0], b->mode[0], 0);
1966 if (b->bs != BS_8x4) {
1967 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1968 s->prob.p.mv_mode[c]);
1969 s->counts.mv_mode[c][b->mode[1] - 10]++;
1970 fill_mv(s, b->mv[1], b->mode[1], 1);
1972 b->mode[1] = b->mode[0];
1973 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1974 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1977 if (b->bs != BS_4x8) {
1978 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1979 s->prob.p.mv_mode[c]);
1980 s->counts.mv_mode[c][b->mode[2] - 10]++;
1981 fill_mv(s, b->mv[2], b->mode[2], 2);
1983 if (b->bs != BS_8x4) {
1984 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1985 s->prob.p.mv_mode[c]);
1986 s->counts.mv_mode[c][b->mode[3] - 10]++;
1987 fill_mv(s, b->mv[3], b->mode[3], 3);
1989 b->mode[3] = b->mode[2];
1990 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1991 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1994 b->mode[2] = b->mode[0];
1995 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1996 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1997 b->mode[3] = b->mode[1];
1998 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1999 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2002 fill_mv(s, b->mv[0], b->mode[0], -1);
2003 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2004 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2005 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2006 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2007 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2008 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2011 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2015 #define SPLAT_CTX(var, val, n) \
2017 case 1: var = val; break; \
2018 case 2: AV_WN16A(&var, val * 0x0101); break; \
2019 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2020 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2022 uint64_t v64 = val * 0x0101010101010101ULL; \
2023 AV_WN64A( &var, v64); \
2024 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2029 #define SPLAT_CTX(var, val, n) \
2031 case 1: var = val; break; \
2032 case 2: AV_WN16A(&var, val * 0x0101); break; \
2033 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2035 uint32_t v32 = val * 0x01010101; \
2036 AV_WN32A( &var, v32); \
2037 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2041 uint32_t v32 = val * 0x01010101; \
2042 AV_WN32A( &var, v32); \
2043 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2044 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2045 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2051 switch (bwh_tab[1][b->bs][0]) {
2052 #define SET_CTXS(dir, off, n) \
2054 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2055 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2056 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2057 if (!s->keyframe && !s->intraonly) { \
2058 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2059 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2060 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2062 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2063 if (s->filtermode == FILTER_SWITCHABLE) { \
2064 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2069 case 1: SET_CTXS(above, col, 1); break;
2070 case 2: SET_CTXS(above, col, 2); break;
2071 case 4: SET_CTXS(above, col, 4); break;
2072 case 8: SET_CTXS(above, col, 8); break;
2074 switch (bwh_tab[1][b->bs][1]) {
2075 case 1: SET_CTXS(left, row7, 1); break;
2076 case 2: SET_CTXS(left, row7, 2); break;
2077 case 4: SET_CTXS(left, row7, 4); break;
2078 case 8: SET_CTXS(left, row7, 8); break;
2083 if (!s->keyframe && !s->intraonly) {
2084 if (b->bs > BS_8x8) {
2085 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2087 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2088 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2089 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2090 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2091 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2092 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2093 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2094 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2096 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2098 for (n = 0; n < w4 * 2; n++) {
2099 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2100 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2102 for (n = 0; n < h4 * 2; n++) {
2103 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2104 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2110 for (y = 0; y < h4; y++) {
2111 int x, o = (row + y) * s->sb_cols * 8 + col;
2112 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2115 for (x = 0; x < w4; x++) {
2119 } else if (b->comp) {
2120 for (x = 0; x < w4; x++) {
2121 mv[x].ref[0] = b->ref[0];
2122 mv[x].ref[1] = b->ref[1];
2123 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2124 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2127 for (x = 0; x < w4; x++) {
2128 mv[x].ref[0] = b->ref[0];
2130 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2136 // FIXME merge cnt/eob arguments?
2137 static av_always_inline int
2138 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2139 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2140 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2141 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2142 const int16_t *band_counts, const int16_t *qmul)
2144 int i = 0, band = 0, band_left = band_counts[band];
2145 uint8_t *tp = p[0][nnz];
2146 uint8_t cache[1024];
2151 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2152 eob[band][nnz][val]++;
2157 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2158 cnt[band][nnz][0]++;
2160 band_left = band_counts[++band];
2162 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2164 if (++i == n_coeffs)
2165 break; //invalid input; blocks should end with EOB
2170 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2171 cnt[band][nnz][1]++;
2175 // fill in p[3-10] (model fill) - only once per frame for each pos
2177 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2179 cnt[band][nnz][2]++;
2180 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2181 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2182 cache[rc] = val = 2;
2184 val = 3 + vp56_rac_get_prob(c, tp[5]);
2187 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2189 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2190 val = 5 + vp56_rac_get_prob(c, 159);
2192 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2193 val += vp56_rac_get_prob(c, 145);
2197 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2198 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2199 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2200 val += (vp56_rac_get_prob(c, 148) << 1);
2201 val += vp56_rac_get_prob(c, 140);
2203 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2204 val += (vp56_rac_get_prob(c, 155) << 2);
2205 val += (vp56_rac_get_prob(c, 140) << 1);
2206 val += vp56_rac_get_prob(c, 135);
2208 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2209 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2210 val += (vp56_rac_get_prob(c, 157) << 3);
2211 val += (vp56_rac_get_prob(c, 141) << 2);
2212 val += (vp56_rac_get_prob(c, 134) << 1);
2213 val += vp56_rac_get_prob(c, 130);
2216 if (!is8bitsperpixel) {
2218 val += vp56_rac_get_prob(c, 255) << 17;
2219 val += vp56_rac_get_prob(c, 255) << 16;
2221 val += (vp56_rac_get_prob(c, 255) << 15);
2222 val += (vp56_rac_get_prob(c, 255) << 14);
2224 val += (vp56_rac_get_prob(c, 254) << 13);
2225 val += (vp56_rac_get_prob(c, 254) << 12);
2226 val += (vp56_rac_get_prob(c, 254) << 11);
2227 val += (vp56_rac_get_prob(c, 252) << 10);
2228 val += (vp56_rac_get_prob(c, 249) << 9);
2229 val += (vp56_rac_get_prob(c, 243) << 8);
2230 val += (vp56_rac_get_prob(c, 230) << 7);
2231 val += (vp56_rac_get_prob(c, 196) << 6);
2232 val += (vp56_rac_get_prob(c, 177) << 5);
2233 val += (vp56_rac_get_prob(c, 153) << 4);
2234 val += (vp56_rac_get_prob(c, 140) << 3);
2235 val += (vp56_rac_get_prob(c, 133) << 2);
2236 val += (vp56_rac_get_prob(c, 130) << 1);
2237 val += vp56_rac_get_prob(c, 129);
2241 #define STORE_COEF(c, i, v) do { \
2242 if (is8bitsperpixel) { \
2245 AV_WN32A(&c[i * 2], v); \
2249 band_left = band_counts[++band];
2251 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2253 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2254 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2256 } while (++i < n_coeffs);
2261 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2262 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2263 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2264 const int16_t (*nb)[2], const int16_t *band_counts,
2265 const int16_t *qmul)
2267 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2268 nnz, scan, nb, band_counts, qmul);
2271 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2272 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2273 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2274 const int16_t (*nb)[2], const int16_t *band_counts,
2275 const int16_t *qmul)
2277 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2278 nnz, scan, nb, band_counts, qmul);
2281 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2282 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2283 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2284 const int16_t (*nb)[2], const int16_t *band_counts,
2285 const int16_t *qmul)
2287 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2288 nnz, scan, nb, band_counts, qmul);
2291 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2292 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2293 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2294 const int16_t (*nb)[2], const int16_t *band_counts,
2295 const int16_t *qmul)
2297 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2298 nnz, scan, nb, band_counts, qmul);
2301 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2303 VP9Context *s = ctx->priv_data;
2305 int row = s->row, col = s->col;
2306 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2307 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2308 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2309 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2310 int end_x = FFMIN(2 * (s->cols - col), w4);
2311 int end_y = FFMIN(2 * (s->rows - row), h4);
2312 int n, pl, x, y, res;
2313 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2314 int tx = 4 * s->lossless + b->tx;
2315 const int16_t * const *yscans = vp9_scans[tx];
2316 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2317 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2318 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2319 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2320 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2321 static const int16_t band_counts[4][8] = {
2322 { 1, 2, 3, 4, 3, 16 - 13 },
2323 { 1, 2, 3, 4, 11, 64 - 21 },
2324 { 1, 2, 3, 4, 11, 256 - 21 },
2325 { 1, 2, 3, 4, 11, 1024 - 21 },
2327 const int16_t *y_band_counts = band_counts[b->tx];
2328 const int16_t *uv_band_counts = band_counts[b->uvtx];
2329 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2330 int total_coeff = 0;
2332 #define MERGE(la, end, step, rd) \
2333 for (n = 0; n < end; n += step) \
2334 la[n] = !!rd(&la[n])
2335 #define MERGE_CTX(step, rd) \
2337 MERGE(l, end_y, step, rd); \
2338 MERGE(a, end_x, step, rd); \
2341 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2342 for (n = 0, y = 0; y < end_y; y += step) { \
2343 for (x = 0; x < end_x; x += step, n += step * step) { \
2344 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2345 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2346 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2347 c, e, p, a[x] + l[y], yscans[txtp], \
2348 ynbs[txtp], y_band_counts, qmul[0]); \
2349 a[x] = l[y] = !!res; \
2350 total_coeff |= !!res; \
2352 AV_WN16A(&s->eob[n], res); \
2359 #define SPLAT(la, end, step, cond) \
2361 for (n = 1; n < end; n += step) \
2362 la[n] = la[n - 1]; \
2363 } else if (step == 4) { \
2365 for (n = 0; n < end; n += step) \
2366 AV_WN32A(&la[n], la[n] * 0x01010101); \
2368 for (n = 0; n < end; n += step) \
2369 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2371 } else /* step == 8 */ { \
2373 if (HAVE_FAST_64BIT) { \
2374 for (n = 0; n < end; n += step) \
2375 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2377 for (n = 0; n < end; n += step) { \
2378 uint32_t v32 = la[n] * 0x01010101; \
2379 AV_WN32A(&la[n], v32); \
2380 AV_WN32A(&la[n + 4], v32); \
2384 for (n = 0; n < end; n += step) \
2385 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2388 #define SPLAT_CTX(step) \
2390 SPLAT(a, end_x, step, end_x == w4); \
2391 SPLAT(l, end_y, step, end_y == h4); \
2397 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2400 MERGE_CTX(2, AV_RN16A);
2401 DECODE_Y_COEF_LOOP(2, 0,);
2405 MERGE_CTX(4, AV_RN32A);
2406 DECODE_Y_COEF_LOOP(4, 0,);
2410 MERGE_CTX(8, AV_RN64A);
2411 DECODE_Y_COEF_LOOP(8, 0, 32);
2416 #define DECODE_UV_COEF_LOOP(step, v) \
2417 for (n = 0, y = 0; y < end_y; y += step) { \
2418 for (x = 0; x < end_x; x += step, n += step * step) { \
2419 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2420 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2421 16 * step * step, c, e, p, a[x] + l[y], \
2422 uvscan, uvnb, uv_band_counts, qmul[1]); \
2423 a[x] = l[y] = !!res; \
2424 total_coeff |= !!res; \
2426 AV_WN16A(&s->uveob[pl][n], res); \
2428 s->uveob[pl][n] = res; \
2433 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2434 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2435 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2440 for (pl = 0; pl < 2; pl++) {
2441 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2442 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2445 DECODE_UV_COEF_LOOP(1,);
2448 MERGE_CTX(2, AV_RN16A);
2449 DECODE_UV_COEF_LOOP(2,);
2453 MERGE_CTX(4, AV_RN32A);
2454 DECODE_UV_COEF_LOOP(4,);
2458 MERGE_CTX(8, AV_RN64A);
2459 DECODE_UV_COEF_LOOP(8, 32);
2468 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2470 return decode_coeffs(ctx, 1);
2473 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2475 return decode_coeffs(ctx, 0);
2478 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2479 uint8_t *dst_edge, ptrdiff_t stride_edge,
2480 uint8_t *dst_inner, ptrdiff_t stride_inner,
2481 uint8_t *l, int col, int x, int w,
2482 int row, int y, enum TxfmMode tx,
2483 int p, int ss_h, int ss_v, int bytesperpixel)
2485 int have_top = row > 0 || y > 0;
2486 int have_left = col > s->tiling.tile_col_start || x > 0;
2487 int have_right = x < w - 1;
2489 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2490 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2491 { DC_127_PRED, VERT_PRED } },
2492 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2493 { HOR_PRED, HOR_PRED } },
2494 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2495 { LEFT_DC_PRED, DC_PRED } },
2496 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2497 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2498 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2499 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2500 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2501 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2502 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2503 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2504 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2505 { DC_127_PRED, VERT_LEFT_PRED } },
2506 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2507 { HOR_UP_PRED, HOR_UP_PRED } },
2508 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2509 { HOR_PRED, TM_VP8_PRED } },
2511 static const struct {
2512 uint8_t needs_left:1;
2513 uint8_t needs_top:1;
2514 uint8_t needs_topleft:1;
2515 uint8_t needs_topright:1;
2516 uint8_t invert_left:1;
2517 } edges[N_INTRA_PRED_MODES] = {
2518 [VERT_PRED] = { .needs_top = 1 },
2519 [HOR_PRED] = { .needs_left = 1 },
2520 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2521 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2522 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2523 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2524 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2525 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2526 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2527 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2528 [LEFT_DC_PRED] = { .needs_left = 1 },
2529 [TOP_DC_PRED] = { .needs_top = 1 },
2530 [DC_128_PRED] = { 0 },
2531 [DC_127_PRED] = { 0 },
2532 [DC_129_PRED] = { 0 }
2535 av_assert2(mode >= 0 && mode < 10);
2536 mode = mode_conv[mode][have_left][have_top];
2537 if (edges[mode].needs_top) {
2538 uint8_t *top, *topleft;
2539 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2540 int n_px_need_tr = 0;
2542 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2545 // if top of sb64-row, use s->intra_pred_data[] instead of
2546 // dst[-stride] for intra prediction (it contains pre- instead of
2547 // post-loopfilter data)
2549 top = !(row & 7) && !y ?
2550 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2551 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2553 topleft = !(row & 7) && !y ?
2554 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2555 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2556 &dst_inner[-stride_inner];
2560 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2561 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2562 n_px_need + n_px_need_tr <= n_px_have) {
2566 if (n_px_need <= n_px_have) {
2567 memcpy(*a, top, n_px_need * bytesperpixel);
2569 #define memset_bpp(c, i1, v, i2, num) do { \
2570 if (bytesperpixel == 1) { \
2571 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2573 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2574 for (n = 0; n < (num); n++) { \
2575 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2579 memcpy(*a, top, n_px_have * bytesperpixel);
2580 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2583 #define memset_val(c, val, num) do { \
2584 if (bytesperpixel == 1) { \
2585 memset((c), (val), (num)); \
2588 for (n = 0; n < (num); n++) { \
2589 AV_WN16A(&(c)[n * 2], (val)); \
2593 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2595 if (edges[mode].needs_topleft) {
2596 if (have_left && have_top) {
2597 #define assign_bpp(c, i1, v, i2) do { \
2598 if (bytesperpixel == 1) { \
2599 (c)[(i1)] = (v)[(i2)]; \
2601 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2604 assign_bpp(*a, -1, topleft, -1);
2606 #define assign_val(c, i, v) do { \
2607 if (bytesperpixel == 1) { \
2610 AV_WN16A(&(c)[(i) * 2], (v)); \
2613 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2616 if (tx == TX_4X4 && edges[mode].needs_topright) {
2617 if (have_top && have_right &&
2618 n_px_need + n_px_need_tr <= n_px_have) {
2619 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2621 memset_bpp(*a, 4, *a, 3, 4);
2626 if (edges[mode].needs_left) {
2628 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2629 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2630 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2632 if (edges[mode].invert_left) {
2633 if (n_px_need <= n_px_have) {
2634 for (i = 0; i < n_px_need; i++)
2635 assign_bpp(l, i, &dst[i * stride], -1);
2637 for (i = 0; i < n_px_have; i++)
2638 assign_bpp(l, i, &dst[i * stride], -1);
2639 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2642 if (n_px_need <= n_px_have) {
2643 for (i = 0; i < n_px_need; i++)
2644 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2646 for (i = 0; i < n_px_have; i++)
2647 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2648 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2652 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2659 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2660 ptrdiff_t uv_off, int bytesperpixel)
2662 VP9Context *s = ctx->priv_data;
2664 int row = s->row, col = s->col;
2665 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2666 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2667 int end_x = FFMIN(2 * (s->cols - col), w4);
2668 int end_y = FFMIN(2 * (s->rows - row), h4);
2669 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2670 int uvstep1d = 1 << b->uvtx, p;
2671 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2672 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2673 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2675 for (n = 0, y = 0; y < end_y; y += step1d) {
2676 uint8_t *ptr = dst, *ptr_r = dst_r;
2677 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2678 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2679 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2681 uint8_t *a = &a_buf[32];
2682 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2683 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2685 mode = check_intra_mode(s, mode, &a, ptr_r,
2686 s->frames[CUR_FRAME].tf.f->linesize[0],
2687 ptr, s->y_stride, l,
2688 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2689 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2691 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2692 s->block + 16 * n * bytesperpixel, eob);
2694 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2695 dst += 4 * step1d * s->y_stride;
2702 step = 1 << (b->uvtx * 2);
2703 for (p = 0; p < 2; p++) {
2704 dst = s->dst[1 + p];
2705 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2706 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2707 uint8_t *ptr = dst, *ptr_r = dst_r;
2708 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2709 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2710 int mode = b->uvmode;
2711 uint8_t *a = &a_buf[32];
2712 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2714 mode = check_intra_mode(s, mode, &a, ptr_r,
2715 s->frames[CUR_FRAME].tf.f->linesize[1],
2716 ptr, s->uv_stride, l, col, x, w4, row, y,
2717 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2718 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2720 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2721 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2723 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2724 dst += 4 * uvstep1d * s->uv_stride;
2729 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2731 intra_recon(ctx, y_off, uv_off, 1);
2734 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2736 intra_recon(ctx, y_off, uv_off, 2);
2739 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2740 uint8_t *dst, ptrdiff_t dst_stride,
2741 const uint8_t *ref, ptrdiff_t ref_stride,
2742 ThreadFrame *ref_frame,
2743 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2744 int bw, int bh, int w, int h, int bytesperpixel,
2745 const uint16_t *scale, const uint8_t *step)
2747 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2749 int refbw_m1, refbh_m1;
2753 mv.x = av_clip(in_mv->x, -(x + bw + 4) << 3, (s->cols * 8 - x + 3) << 3);
2754 mv.y = av_clip(in_mv->y, -(y + bh + 4) << 3, (s->rows * 8 - y + 3) << 3);
2755 // BUG libvpx seems to scale the two components separately. This introduces
2756 // rounding errors but we have to reproduce them to be exactly compatible
2757 // with the output from libvpx...
2758 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2759 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2763 ref += y * ref_stride + x * bytesperpixel;
2766 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2767 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2768 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2769 // we use +7 because the last 7 pixels of each sbrow can be changed in
2770 // the longest loopfilter of the next sbrow
2771 th = (y + refbh_m1 + 4 + 7) >> 6;
2772 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2773 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2774 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2775 ref - 3 * ref_stride - 3 * bytesperpixel,
2777 refbw_m1 + 8, refbh_m1 + 8,
2778 x - 3, y - 3, w, h);
2779 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2782 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2785 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2786 uint8_t *dst_u, uint8_t *dst_v,
2787 ptrdiff_t dst_stride,
2788 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2789 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2790 ThreadFrame *ref_frame,
2791 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2792 int bw, int bh, int w, int h, int bytesperpixel,
2793 const uint16_t *scale, const uint8_t *step)
2796 int refbw_m1, refbh_m1;
2801 // BUG https://code.google.com/p/webm/issues/detail?id=820
2802 mv.x = av_clip(in_mv->x, -(x + bw + 4) << 4, (s->cols * 4 - x + 3) << 4);
2803 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2805 mv.x = av_clip(in_mv->x, -(x + bw + 4) << 3, (s->cols * 8 - x + 3) << 3);
2806 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2809 // BUG https://code.google.com/p/webm/issues/detail?id=820
2810 mv.y = av_clip(in_mv->y, -(y + bh + 4) << 4, (s->rows * 4 - y + 3) << 4);
2811 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2813 mv.y = av_clip(in_mv->y, -(y + bh + 4) << 3, (s->rows * 8 - y + 3) << 3);
2814 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2819 ref_u += y * src_stride_u + x * bytesperpixel;
2820 ref_v += y * src_stride_v + x * bytesperpixel;
2823 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2824 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2825 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2826 // we use +7 because the last 7 pixels of each sbrow can be changed in
2827 // the longest loopfilter of the next sbrow
2828 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2829 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2830 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2831 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2832 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2834 refbw_m1 + 8, refbh_m1 + 8,
2835 x - 3, y - 3, w, h);
2836 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2837 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2839 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2840 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2842 refbw_m1 + 8, refbh_m1 + 8,
2843 x - 3, y - 3, w, h);
2844 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2845 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2847 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2848 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2852 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2853 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2854 mv, bw, bh, w, h, bytesperpixel, \
2855 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2856 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2857 row, col, mv, bw, bh, w, h, i) \
2858 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2859 row, col, mv, bw, bh, w, h, bytesperpixel, \
2860 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2862 #define FN(x) x##_scaled_8bpp
2863 #define BYTES_PER_PIXEL 1
2864 #include "vp9_mc_template.c"
2866 #undef BYTES_PER_PIXEL
2867 #define FN(x) x##_scaled_16bpp
2868 #define BYTES_PER_PIXEL 2
2869 #include "vp9_mc_template.c"
2871 #undef mc_chroma_dir
2873 #undef BYTES_PER_PIXEL
2876 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2877 uint8_t *dst, ptrdiff_t dst_stride,
2878 const uint8_t *ref, ptrdiff_t ref_stride,
2879 ThreadFrame *ref_frame,
2880 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2881 int bw, int bh, int w, int h, int bytesperpixel)
2883 int mx = mv->x, my = mv->y, th;
2887 ref += y * ref_stride + x * bytesperpixel;
2890 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2891 // we use +7 because the last 7 pixels of each sbrow can be changed in
2892 // the longest loopfilter of the next sbrow
2893 th = (y + bh + 4 * !!my + 7) >> 6;
2894 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2895 if (x < !!mx * 3 || y < !!my * 3 ||
2896 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2897 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2898 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2900 bw + !!mx * 7, bh + !!my * 7,
2901 x - !!mx * 3, y - !!my * 3, w, h);
2902 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2905 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2908 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2909 uint8_t *dst_u, uint8_t *dst_v,
2910 ptrdiff_t dst_stride,
2911 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2912 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2913 ThreadFrame *ref_frame,
2914 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2915 int bw, int bh, int w, int h, int bytesperpixel)
2917 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2921 ref_u += y * src_stride_u + x * bytesperpixel;
2922 ref_v += y * src_stride_v + x * bytesperpixel;
2925 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2926 // we use +7 because the last 7 pixels of each sbrow can be changed in
2927 // the longest loopfilter of the next sbrow
2928 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2929 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2930 if (x < !!mx * 3 || y < !!my * 3 ||
2931 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2932 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2933 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2935 bw + !!mx * 7, bh + !!my * 7,
2936 x - !!mx * 3, y - !!my * 3, w, h);
2937 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2938 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2940 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2941 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2943 bw + !!mx * 7, bh + !!my * 7,
2944 x - !!mx * 3, y - !!my * 3, w, h);
2945 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2946 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2948 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2949 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2953 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2954 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2955 mv, bw, bh, w, h, bytesperpixel)
2956 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2957 row, col, mv, bw, bh, w, h, i) \
2958 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2959 row, col, mv, bw, bh, w, h, bytesperpixel)
2961 #define FN(x) x##_8bpp
2962 #define BYTES_PER_PIXEL 1
2963 #include "vp9_mc_template.c"
2965 #undef BYTES_PER_PIXEL
2966 #define FN(x) x##_16bpp
2967 #define BYTES_PER_PIXEL 2
2968 #include "vp9_mc_template.c"
2969 #undef mc_luma_dir_dir
2970 #undef mc_chroma_dir_dir
2972 #undef BYTES_PER_PIXEL
2975 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
2977 VP9Context *s = ctx->priv_data;
2979 int row = s->row, col = s->col;
2981 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2982 if (bytesperpixel == 1) {
2983 inter_pred_scaled_8bpp(ctx);
2985 inter_pred_scaled_16bpp(ctx);
2988 if (bytesperpixel == 1) {
2989 inter_pred_8bpp(ctx);
2991 inter_pred_16bpp(ctx);
2995 /* mostly copied intra_recon() */
2997 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2998 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2999 int end_x = FFMIN(2 * (s->cols - col), w4);
3000 int end_y = FFMIN(2 * (s->rows - row), h4);
3001 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3002 int uvstep1d = 1 << b->uvtx, p;
3003 uint8_t *dst = s->dst[0];
3006 for (n = 0, y = 0; y < end_y; y += step1d) {
3008 for (x = 0; x < end_x; x += step1d,
3009 ptr += 4 * step1d * bytesperpixel, n += step) {
3010 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3013 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3014 s->block + 16 * n * bytesperpixel, eob);
3016 dst += 4 * s->y_stride * step1d;
3022 step = 1 << (b->uvtx * 2);
3023 for (p = 0; p < 2; p++) {
3024 dst = s->dst[p + 1];
3025 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3027 for (x = 0; x < end_x; x += uvstep1d,
3028 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3029 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3032 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3033 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3035 dst += 4 * uvstep1d * s->uv_stride;
3041 static void inter_recon_8bpp(AVCodecContext *ctx)
3043 inter_recon(ctx, 1);
3046 static void inter_recon_16bpp(AVCodecContext *ctx)
3048 inter_recon(ctx, 2);
3051 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3052 int row_and_7, int col_and_7,
3053 int w, int h, int col_end, int row_end,
3054 enum TxfmMode tx, int skip_inter)
3056 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3057 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3059 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3060 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3061 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3062 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3064 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3065 // edges. This means that for UV, we work on two subsampled blocks at
3066 // a time, and we only use the topleft block's mode information to set
3067 // things like block strength. Thus, for any block size smaller than
3068 // 16x16, ignore the odd portion of the block.
3069 if (tx == TX_4X4 && (ss_v | ss_h)) {
3084 if (tx == TX_4X4 && !skip_inter) {
3085 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3086 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3087 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3089 for (y = row_and_7; y < h + row_and_7; y++) {
3090 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3092 mask[0][y][1] |= m_row_8;
3093 mask[0][y][2] |= m_row_4;
3094 // for odd lines, if the odd col is not being filtered,
3095 // skip odd row also:
3102 // if a/c are even row/col and b/d are odd, and d is skipped,
3103 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3104 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3105 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3107 mask[1][y][col_mask_id] |= m_col;
3110 mask[0][y][3] |= m_col;
3112 mask[1][y][3] |= m_col;
3115 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3118 int mask_id = (tx == TX_8X8);
3119 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3120 int l2 = tx + ss_h - 1, step1d;
3121 int m_row = m_col & masks[l2];
3123 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3124 // 8wd loopfilter to prevent going off the visible edge.
3125 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3126 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3127 int m_row_8 = m_row - m_row_16;
3129 for (y = row_and_7; y < h + row_and_7; y++) {
3130 mask[0][y][0] |= m_row_16;
3131 mask[0][y][1] |= m_row_8;
3134 for (y = row_and_7; y < h + row_and_7; y++)
3135 mask[0][y][mask_id] |= m_row;
3140 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3141 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3142 mask[1][y][0] |= m_col;
3143 if (y - row_and_7 == h - 1)
3144 mask[1][y][1] |= m_col;
3146 for (y = row_and_7; y < h + row_and_7; y += step1d)
3147 mask[1][y][mask_id] |= m_col;
3149 } else if (tx != TX_4X4) {
3152 mask_id = (tx == TX_8X8) || (h == ss_v);
3153 mask[1][row_and_7][mask_id] |= m_col;
3154 mask_id = (tx == TX_8X8) || (w == ss_h);
3155 for (y = row_and_7; y < h + row_and_7; y++)
3156 mask[0][y][mask_id] |= t;
3158 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3160 for (y = row_and_7; y < h + row_and_7; y++) {
3161 mask[0][y][2] |= t4;
3162 mask[0][y][1] |= t8;
3164 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3169 static void decode_b(AVCodecContext *ctx, int row, int col,
3170 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3171 enum BlockLevel bl, enum BlockPartition bp)
3173 VP9Context *s = ctx->priv_data;
3175 enum BlockSize bs = bl * 3 + bp;
3176 int bytesperpixel = s->bytesperpixel;
3177 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3179 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3185 s->min_mv.x = -(128 + col * 64);
3186 s->min_mv.y = -(128 + row * 64);
3187 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3188 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3194 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3195 (s->ss_v && h4 * 2 == (1 << b->tx)));
3200 if (bytesperpixel == 1) {
3201 has_coeffs = decode_coeffs_8bpp(ctx);
3203 has_coeffs = decode_coeffs_16bpp(ctx);
3205 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3207 memset(&s->above_skip_ctx[col], 1, w4);
3208 memset(&s->left_skip_ctx[s->row7], 1, h4);
3213 #define SPLAT_ZERO_CTX(v, n) \
3215 case 1: v = 0; break; \
3216 case 2: AV_ZERO16(&v); break; \
3217 case 4: AV_ZERO32(&v); break; \
3218 case 8: AV_ZERO64(&v); break; \
3219 case 16: AV_ZERO128(&v); break; \
3221 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3223 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3224 if (s->ss_##dir2) { \
3225 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3226 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3228 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3229 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3234 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3235 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3236 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3237 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3240 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3241 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3242 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3243 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3248 s->block += w4 * h4 * 64 * bytesperpixel;
3249 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3250 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3251 s->eob += 4 * w4 * h4;
3252 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3253 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3259 // emulated overhangs if the stride of the target buffer can't hold. This
3260 // allows to support emu-edge and so on even if we have large block
3262 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3263 (row + h4) > s->rows;
3264 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3265 (row + h4) > s->rows;
3267 s->dst[0] = s->tmp_y;
3270 s->dst[0] = f->data[0] + yoff;
3271 s->y_stride = f->linesize[0];
3274 s->dst[1] = s->tmp_uv[0];
3275 s->dst[2] = s->tmp_uv[1];
3278 s->dst[1] = f->data[1] + uvoff;
3279 s->dst[2] = f->data[2] + uvoff;
3280 s->uv_stride = f->linesize[1];
3284 intra_recon_16bpp(ctx, yoff, uvoff);
3286 intra_recon_8bpp(ctx, yoff, uvoff);
3290 inter_recon_16bpp(ctx);
3292 inter_recon_8bpp(ctx);
3296 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3298 for (n = 0; o < w; n++) {
3303 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3304 s->tmp_y + o, 128, h, 0, 0);
3305 o += bw * bytesperpixel;
3310 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3311 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3313 for (n = 1; o < w; n++) {
3318 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3319 s->tmp_uv[0] + o, 128, h, 0, 0);
3320 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3321 s->tmp_uv[1] + o, 128, h, 0, 0);
3322 o += bw * bytesperpixel;
3327 // pick filter level and find edges to apply filter to
3328 if (s->filter.level &&
3329 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3330 [b->mode[3] != ZEROMV]) > 0) {
3331 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3332 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3334 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3335 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3336 if (s->ss_h || s->ss_v)
3337 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3338 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3339 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3340 b->uvtx, skip_inter);
3342 if (!s->filter.lim_lut[lvl]) {
3343 int sharp = s->filter.sharpness;
3347 limit >>= (sharp + 3) >> 2;
3348 limit = FFMIN(limit, 9 - sharp);
3350 limit = FFMAX(limit, 1);
3352 s->filter.lim_lut[lvl] = limit;
3353 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3359 s->block += w4 * h4 * 64 * bytesperpixel;
3360 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3361 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3362 s->eob += 4 * w4 * h4;
3363 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3364 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3368 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3369 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3371 VP9Context *s = ctx->priv_data;
3372 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3373 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3374 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3375 s->prob.p.partition[bl][c];
3376 enum BlockPartition bp;
3377 ptrdiff_t hbs = 4 >> bl;
3378 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3379 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3380 int bytesperpixel = s->bytesperpixel;
3383 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3384 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3385 } else if (col + hbs < s->cols) { // FIXME why not <=?
3386 if (row + hbs < s->rows) { // FIXME why not <=?
3387 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3389 case PARTITION_NONE:
3390 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3393 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3394 yoff += hbs * 8 * y_stride;
3395 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3396 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3399 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3400 yoff += hbs * 8 * bytesperpixel;
3401 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3402 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3404 case PARTITION_SPLIT:
3405 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3406 decode_sb(ctx, row, col + hbs, lflvl,
3407 yoff + 8 * hbs * bytesperpixel,
3408 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3409 yoff += hbs * 8 * y_stride;
3410 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3411 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3412 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3413 yoff + 8 * hbs * bytesperpixel,
3414 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3419 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3420 bp = PARTITION_SPLIT;
3421 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3422 decode_sb(ctx, row, col + hbs, lflvl,
3423 yoff + 8 * hbs * bytesperpixel,
3424 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3427 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3429 } else if (row + hbs < s->rows) { // FIXME why not <=?
3430 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3431 bp = PARTITION_SPLIT;
3432 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3433 yoff += hbs * 8 * y_stride;
3434 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3435 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3438 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3441 bp = PARTITION_SPLIT;
3442 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3444 s->counts.partition[bl][c][bp]++;
3447 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3448 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3450 VP9Context *s = ctx->priv_data;
3452 ptrdiff_t hbs = 4 >> bl;
3453 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3454 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3455 int bytesperpixel = s->bytesperpixel;
3458 av_assert2(b->bl == BL_8X8);
3459 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3460 } else if (s->b->bl == bl) {
3461 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3462 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3463 yoff += hbs * 8 * y_stride;
3464 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3465 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3466 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3467 yoff += hbs * 8 * bytesperpixel;
3468 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3469 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3472 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3473 if (col + hbs < s->cols) { // FIXME why not <=?
3474 if (row + hbs < s->rows) {
3475 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3476 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3477 yoff += hbs * 8 * y_stride;
3478 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3479 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3480 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3481 yoff + 8 * hbs * bytesperpixel,
3482 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3484 yoff += hbs * 8 * bytesperpixel;
3485 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3486 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3488 } else if (row + hbs < s->rows) {
3489 yoff += hbs * 8 * y_stride;
3490 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3491 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3496 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3497 uint8_t *lvl, uint8_t (*mask)[4],
3498 uint8_t *dst, ptrdiff_t ls)
3500 int y, x, bytesperpixel = s->bytesperpixel;
3502 // filter edges between columns (e.g. block1 | block2)
3503 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3504 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3505 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3506 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3507 unsigned hm = hm1 | hm2 | hm13 | hm23;
3509 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3512 int L = *l, H = L >> 4;
3513 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3515 if (hmask1[0] & x) {
3516 if (hmask2[0] & x) {
3517 av_assert2(l[8 << ss_v] == L);
3518 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3520 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3522 } else if (hm2 & x) {
3525 E |= s->filter.mblim_lut[L] << 8;
3526 I |= s->filter.lim_lut[L] << 8;
3527 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3529 [0](ptr, ls, E, I, H);
3531 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3532 [0](ptr, ls, E, I, H);
3534 } else if (hm2 & x) {
3535 int L = l[8 << ss_v], H = L >> 4;
3536 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3538 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3539 [0](ptr + 8 * ls, ls, E, I, H);
3547 int L = *l, H = L >> 4;
3548 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3553 E |= s->filter.mblim_lut[L] << 8;
3554 I |= s->filter.lim_lut[L] << 8;
3555 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3557 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3559 } else if (hm23 & x) {
3560 int L = l[8 << ss_v], H = L >> 4;
3561 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3563 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3571 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3572 uint8_t *lvl, uint8_t (*mask)[4],
3573 uint8_t *dst, ptrdiff_t ls)
3575 int y, x, bytesperpixel = s->bytesperpixel;
3578 // filter edges between rows (e.g. ------)
3580 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3581 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3582 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3584 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3587 int L = *l, H = L >> 4;
3588 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3591 if (vmask[0] & (x << (1 + ss_h))) {
3592 av_assert2(l[1 + ss_h] == L);
3593 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3595 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3597 } else if (vm & (x << (1 + ss_h))) {
3600 E |= s->filter.mblim_lut[L] << 8;
3601 I |= s->filter.lim_lut[L] << 8;
3602 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3603 [!!(vmask[1] & (x << (1 + ss_h)))]
3604 [1](ptr, ls, E, I, H);
3606 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3607 [1](ptr, ls, E, I, H);
3609 } else if (vm & (x << (1 + ss_h))) {
3610 int L = l[1 + ss_h], H = L >> 4;
3611 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3613 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3614 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3619 int L = *l, H = L >> 4;
3620 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3622 if (vm3 & (x << (1 + ss_h))) {
3625 E |= s->filter.mblim_lut[L] << 8;
3626 I |= s->filter.lim_lut[L] << 8;
3627 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3629 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3631 } else if (vm3 & (x << (1 + ss_h))) {
3632 int L = l[1 + ss_h], H = L >> 4;
3633 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3635 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3648 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3649 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3651 VP9Context *s = ctx->priv_data;
3652 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3653 uint8_t *dst = f->data[0] + yoff;
3654 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3655 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3658 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3659 // if you think of them as acting on a 8x8 block max, we can interleave
3660 // each v/h within the single x loop, but that only works if we work on
3661 // 8 pixel blocks, and we won't always do that (we want at least 16px
3662 // to use SSE2 optimizations, perhaps 32 for AVX2)
3664 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3665 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3667 for (p = 0; p < 2; p++) {
3668 dst = f->data[1 + p] + uvoff;
3669 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3670 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3674 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3676 int sb_start = ( idx * n) >> log2_n;
3677 int sb_end = ((idx + 1) * n) >> log2_n;
3678 *start = FFMIN(sb_start, n) << 3;
3679 *end = FFMIN(sb_end, n) << 3;
3682 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3683 int max_count, int update_factor)
3685 unsigned ct = ct0 + ct1, p2, p1;
3691 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3692 p2 = av_clip(p2, 1, 255);
3693 ct = FFMIN(ct, max_count);
3694 update_factor = FASTDIV(update_factor * ct, max_count);
3696 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3697 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3700 static void adapt_probs(VP9Context *s)
3703 prob_context *p = &s->prob_ctx[s->framectxid].p;
3704 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3707 for (i = 0; i < 4; i++)
3708 for (j = 0; j < 2; j++)
3709 for (k = 0; k < 2; k++)
3710 for (l = 0; l < 6; l++)
3711 for (m = 0; m < 6; m++) {
3712 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3713 unsigned *e = s->counts.eob[i][j][k][l][m];
3714 unsigned *c = s->counts.coef[i][j][k][l][m];
3716 if (l == 0 && m >= 3) // dc only has 3 pt
3719 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3720 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3721 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3724 if (s->keyframe || s->intraonly) {
3725 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3726 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3727 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3728 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3733 for (i = 0; i < 3; i++)
3734 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3737 for (i = 0; i < 4; i++)
3738 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3741 if (s->comppredmode == PRED_SWITCHABLE) {
3742 for (i = 0; i < 5; i++)
3743 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3747 if (s->comppredmode != PRED_SINGLEREF) {
3748 for (i = 0; i < 5; i++)
3749 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3750 s->counts.comp_ref[i][1], 20, 128);
3753 if (s->comppredmode != PRED_COMPREF) {
3754 for (i = 0; i < 5; i++) {
3755 uint8_t *pp = p->single_ref[i];
3756 unsigned (*c)[2] = s->counts.single_ref[i];
3758 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3759 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3763 // block partitioning
3764 for (i = 0; i < 4; i++)
3765 for (j = 0; j < 4; j++) {
3766 uint8_t *pp = p->partition[i][j];
3767 unsigned *c = s->counts.partition[i][j];
3769 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3770 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3771 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3775 if (s->txfmmode == TX_SWITCHABLE) {
3776 for (i = 0; i < 2; i++) {
3777 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3779 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3780 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3781 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3782 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3783 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3784 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3788 // interpolation filter
3789 if (s->filtermode == FILTER_SWITCHABLE) {
3790 for (i = 0; i < 4; i++) {
3791 uint8_t *pp = p->filter[i];
3792 unsigned *c = s->counts.filter[i];
3794 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3795 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3800 for (i = 0; i < 7; i++) {
3801 uint8_t *pp = p->mv_mode[i];
3802 unsigned *c = s->counts.mv_mode[i];
3804 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3805 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3806 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3811 uint8_t *pp = p->mv_joint;
3812 unsigned *c = s->counts.mv_joint;
3814 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3815 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3816 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3820 for (i = 0; i < 2; i++) {
3822 unsigned *c, (*c2)[2], sum;
3824 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3825 s->counts.mv_comp[i].sign[1], 20, 128);
3827 pp = p->mv_comp[i].classes;
3828 c = s->counts.mv_comp[i].classes;
3829 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3830 adapt_prob(&pp[0], c[0], sum, 20, 128);
3832 adapt_prob(&pp[1], c[1], sum, 20, 128);
3834 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3835 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3837 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3838 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3840 adapt_prob(&pp[6], c[6], sum, 20, 128);
3841 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3842 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3843 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3845 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3846 s->counts.mv_comp[i].class0[1], 20, 128);
3847 pp = p->mv_comp[i].bits;
3848 c2 = s->counts.mv_comp[i].bits;
3849 for (j = 0; j < 10; j++)
3850 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3852 for (j = 0; j < 2; j++) {
3853 pp = p->mv_comp[i].class0_fp[j];
3854 c = s->counts.mv_comp[i].class0_fp[j];
3855 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3856 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3857 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3859 pp = p->mv_comp[i].fp;
3860 c = s->counts.mv_comp[i].fp;
3861 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3862 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3863 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3865 if (s->highprecisionmvs) {
3866 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3867 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3868 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3869 s->counts.mv_comp[i].hp[1], 20, 128);
3874 for (i = 0; i < 4; i++) {
3875 uint8_t *pp = p->y_mode[i];
3876 unsigned *c = s->counts.y_mode[i], sum, s2;
3878 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3879 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3880 sum -= c[TM_VP8_PRED];
3881 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3882 sum -= c[VERT_PRED];
3883 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3884 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3886 adapt_prob(&pp[3], s2, sum, 20, 128);
3888 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3889 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3890 sum -= c[DIAG_DOWN_LEFT_PRED];
3891 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3892 sum -= c[VERT_LEFT_PRED];
3893 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3894 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3898 for (i = 0; i < 10; i++) {
3899 uint8_t *pp = p->uv_mode[i];
3900 unsigned *c = s->counts.uv_mode[i], sum, s2;
3902 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3903 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3904 sum -= c[TM_VP8_PRED];
3905 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3906 sum -= c[VERT_PRED];
3907 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3908 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3910 adapt_prob(&pp[3], s2, sum, 20, 128);
3912 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3913 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3914 sum -= c[DIAG_DOWN_LEFT_PRED];
3915 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3916 sum -= c[VERT_LEFT_PRED];
3917 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3918 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3922 static void free_buffers(VP9Context *s)
3924 av_freep(&s->intra_pred_data[0]);
3925 av_freep(&s->b_base);
3926 av_freep(&s->block_base);
3929 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3931 VP9Context *s = ctx->priv_data;
3934 for (i = 0; i < 3; i++) {
3935 if (s->frames[i].tf.f->data[0])
3936 vp9_unref_frame(ctx, &s->frames[i]);
3937 av_frame_free(&s->frames[i].tf.f);
3939 for (i = 0; i < 8; i++) {
3940 if (s->refs[i].f->data[0])
3941 ff_thread_release_buffer(ctx, &s->refs[i]);
3942 av_frame_free(&s->refs[i].f);
3943 if (s->next_refs[i].f->data[0])
3944 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3945 av_frame_free(&s->next_refs[i].f);
3955 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3956 int *got_frame, AVPacket *pkt)
3958 const uint8_t *data = pkt->data;
3959 int size = pkt->size;
3960 VP9Context *s = ctx->priv_data;
3961 int res, tile_row, tile_col, i, ref, row, col;
3962 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3963 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3967 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3969 } else if (res == 0) {
3970 if (!s->refs[ref].f->data[0]) {
3971 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3972 return AVERROR_INVALIDDATA;
3974 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3976 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3977 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3978 for (i = 0; i < 8; i++) {
3979 if (s->next_refs[i].f->data[0])
3980 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3981 if (s->refs[i].f->data[0] &&
3982 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
3991 if (!retain_segmap_ref) {
3992 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3993 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3994 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3995 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3998 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3999 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4000 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4001 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4003 if (s->frames[CUR_FRAME].tf.f->data[0])
4004 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4005 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4007 f = s->frames[CUR_FRAME].tf.f;
4008 f->key_frame = s->keyframe;
4009 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4010 ls_y = f->linesize[0];
4011 ls_uv =f->linesize[1];
4014 for (i = 0; i < 8; i++) {
4015 if (s->next_refs[i].f->data[0])
4016 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4017 if (s->refreshrefmask & (1 << i)) {
4018 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4019 } else if (s->refs[i].f->data[0]) {
4020 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4026 // main tile decode loop
4027 bytesperpixel = s->bytesperpixel;
4028 memset(s->above_partition_ctx, 0, s->cols);
4029 memset(s->above_skip_ctx, 0, s->cols);
4030 if (s->keyframe || s->intraonly) {
4031 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4033 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4035 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4036 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4037 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4038 memset(s->above_segpred_ctx, 0, s->cols);
4039 s->pass = s->frames[CUR_FRAME].uses_2pass =
4040 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4041 if ((res = update_block_buffers(ctx)) < 0) {
4042 av_log(ctx, AV_LOG_ERROR,
4043 "Failed to allocate block buffers\n");
4046 if (s->refreshctx && s->parallelmode) {
4049 for (i = 0; i < 4; i++) {
4050 for (j = 0; j < 2; j++)
4051 for (k = 0; k < 2; k++)
4052 for (l = 0; l < 6; l++)
4053 for (m = 0; m < 6; m++)
4054 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4055 s->prob.coef[i][j][k][l][m], 3);
4056 if (s->txfmmode == i)
4059 s->prob_ctx[s->framectxid].p = s->prob.p;
4060 ff_thread_finish_setup(ctx);
4061 } else if (!s->refreshctx) {
4062 ff_thread_finish_setup(ctx);
4068 s->block = s->block_base;
4069 s->uvblock[0] = s->uvblock_base[0];
4070 s->uvblock[1] = s->uvblock_base[1];
4071 s->eob = s->eob_base;
4072 s->uveob[0] = s->uveob_base[0];
4073 s->uveob[1] = s->uveob_base[1];
4075 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4076 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4077 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4079 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4082 if (tile_col == s->tiling.tile_cols - 1 &&
4083 tile_row == s->tiling.tile_rows - 1) {
4086 tile_size = AV_RB32(data);
4090 if (tile_size > size) {
4091 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4092 return AVERROR_INVALIDDATA;
4094 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4095 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4096 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4097 return AVERROR_INVALIDDATA;
4104 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4105 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4106 struct VP9Filter *lflvl_ptr = s->lflvl;
4107 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4109 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4110 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4111 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4114 memset(s->left_partition_ctx, 0, 8);
4115 memset(s->left_skip_ctx, 0, 8);
4116 if (s->keyframe || s->intraonly) {
4117 memset(s->left_mode_ctx, DC_PRED, 16);
4119 memset(s->left_mode_ctx, NEARESTMV, 8);
4121 memset(s->left_y_nnz_ctx, 0, 16);
4122 memset(s->left_uv_nnz_ctx, 0, 32);
4123 memset(s->left_segpred_ctx, 0, 8);
4125 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4128 for (col = s->tiling.tile_col_start;
4129 col < s->tiling.tile_col_end;
4130 col += 8, yoff2 += 64 * bytesperpixel,
4131 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4132 // FIXME integrate with lf code (i.e. zero after each
4133 // use, similar to invtxfm coefficients, or similar)
4135 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4139 decode_sb_mem(ctx, row, col, lflvl_ptr,
4140 yoff2, uvoff2, BL_64X64);
4142 decode_sb(ctx, row, col, lflvl_ptr,
4143 yoff2, uvoff2, BL_64X64);
4147 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4155 // backup pre-loopfilter reconstruction data for intra
4156 // prediction of next row of sb64s
4157 if (row + 8 < s->rows) {
4158 memcpy(s->intra_pred_data[0],
4159 f->data[0] + yoff + 63 * ls_y,
4160 8 * s->cols * bytesperpixel);
4161 memcpy(s->intra_pred_data[1],
4162 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4163 8 * s->cols * bytesperpixel >> s->ss_h);
4164 memcpy(s->intra_pred_data[2],
4165 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4166 8 * s->cols * bytesperpixel >> s->ss_h);
4169 // loopfilter one row
4170 if (s->filter.level) {
4173 lflvl_ptr = s->lflvl;
4174 for (col = 0; col < s->cols;
4175 col += 8, yoff2 += 64 * bytesperpixel,
4176 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4177 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4181 // FIXME maybe we can make this more finegrained by running the
4182 // loopfilter per-block instead of after each sbrow
4183 // In fact that would also make intra pred left preparation easier?
4184 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4188 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4190 ff_thread_finish_setup(ctx);
4192 } while (s->pass++ == 1);
4193 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4196 for (i = 0; i < 8; i++) {
4197 if (s->refs[i].f->data[0])
4198 ff_thread_release_buffer(ctx, &s->refs[i]);
4199 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4202 if (!s->invisible) {
4203 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4211 static void vp9_decode_flush(AVCodecContext *ctx)
4213 VP9Context *s = ctx->priv_data;
4216 for (i = 0; i < 3; i++)
4217 vp9_unref_frame(ctx, &s->frames[i]);
4218 for (i = 0; i < 8; i++)
4219 ff_thread_release_buffer(ctx, &s->refs[i]);
4222 static int init_frames(AVCodecContext *ctx)
4224 VP9Context *s = ctx->priv_data;
4227 for (i = 0; i < 3; i++) {
4228 s->frames[i].tf.f = av_frame_alloc();
4229 if (!s->frames[i].tf.f) {
4230 vp9_decode_free(ctx);
4231 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4232 return AVERROR(ENOMEM);
4235 for (i = 0; i < 8; i++) {
4236 s->refs[i].f = av_frame_alloc();
4237 s->next_refs[i].f = av_frame_alloc();
4238 if (!s->refs[i].f || !s->next_refs[i].f) {
4239 vp9_decode_free(ctx);
4240 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4241 return AVERROR(ENOMEM);
4248 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4250 VP9Context *s = ctx->priv_data;
4252 ctx->internal->allocate_progress = 1;
4254 s->filter.sharpness = -1;
4256 return init_frames(ctx);
4259 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4261 return init_frames(avctx);
4264 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4267 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4269 // detect size changes in other threads
4270 if (s->intra_pred_data[0] &&
4271 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4275 for (i = 0; i < 3; i++) {
4276 if (s->frames[i].tf.f->data[0])
4277 vp9_unref_frame(dst, &s->frames[i]);
4278 if (ssrc->frames[i].tf.f->data[0]) {
4279 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4283 for (i = 0; i < 8; i++) {
4284 if (s->refs[i].f->data[0])
4285 ff_thread_release_buffer(dst, &s->refs[i]);
4286 if (ssrc->next_refs[i].f->data[0]) {
4287 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4292 s->invisible = ssrc->invisible;
4293 s->keyframe = ssrc->keyframe;
4294 s->ss_v = ssrc->ss_v;
4295 s->ss_h = ssrc->ss_h;
4296 s->segmentation.enabled = ssrc->segmentation.enabled;
4297 s->segmentation.update_map = ssrc->segmentation.update_map;
4298 s->bytesperpixel = ssrc->bytesperpixel;
4300 s->bpp_index = ssrc->bpp_index;
4301 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4302 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4303 if (ssrc->segmentation.enabled) {
4304 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4305 sizeof(s->segmentation.feat));
4311 static const AVProfile profiles[] = {
4312 { FF_PROFILE_VP9_0, "Profile 0" },
4313 { FF_PROFILE_VP9_1, "Profile 1" },
4314 { FF_PROFILE_VP9_2, "Profile 2" },
4315 { FF_PROFILE_VP9_3, "Profile 3" },
4316 { FF_PROFILE_UNKNOWN },
4319 AVCodec ff_vp9_decoder = {
4321 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4322 .type = AVMEDIA_TYPE_VIDEO,
4323 .id = AV_CODEC_ID_VP9,
4324 .priv_data_size = sizeof(VP9Context),
4325 .init = vp9_decode_init,
4326 .close = vp9_decode_free,
4327 .decode = vp9_decode_frame,
4328 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4329 .flush = vp9_decode_flush,
4330 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4331 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4332 .profiles = NULL_IF_CONFIG_SMALL(profiles),