2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
34 #include "libavutil/pixdesc.h"
36 #define VP9_SYNCCODE 0x498342
73 typedef struct VP9Frame {
75 AVBufferRef *extradata;
76 uint8_t *segmentation_map;
77 struct VP9mvrefPair *mv;
83 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
84 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
87 typedef struct VP9Block {
88 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
89 enum FilterMode filter;
90 VP56mv mv[4 /* b_idx */][2 /* ref */];
92 enum TxfmMode tx, uvtx;
94 enum BlockPartition bp;
97 typedef struct VP9Context {
104 VP9Block *b_base, *b;
106 int row, row7, col, col7;
108 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
112 uint8_t last_bpp, bpp, bpp_index, bytesperpixel;
114 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
156 uint8_t ignore_refmap;
161 uint8_t skip_enabled;
170 unsigned log2_tile_cols, log2_tile_rows;
171 unsigned tile_cols, tile_rows;
172 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
174 unsigned sb_cols, sb_rows, rows, cols;
177 uint8_t coef[4][2][2][6][6][3];
181 uint8_t coef[4][2][2][6][6][11];
186 unsigned y_mode[4][10];
187 unsigned uv_mode[10][10];
188 unsigned filter[4][3];
189 unsigned mv_mode[7][4];
190 unsigned intra[4][2];
192 unsigned single_ref[5][2][2];
193 unsigned comp_ref[5][2];
194 unsigned tx32p[2][4];
195 unsigned tx16p[2][3];
198 unsigned mv_joint[4];
201 unsigned classes[11];
203 unsigned bits[10][2];
204 unsigned class0_fp[2][4];
206 unsigned class0_hp[2];
209 unsigned partition[4][4][4];
210 unsigned coef[4][2][2][6][6][3];
211 unsigned eob[4][2][2][6][6][2];
213 enum TxfmMode txfmmode;
214 enum CompPredMode comppredmode;
216 // contextual (left/above) cache
217 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
218 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
219 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
220 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
221 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
228 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
229 uint8_t *above_partition_ctx;
230 uint8_t *above_mode_ctx;
231 // FIXME maybe merge some of the below in a flags field?
232 uint8_t *above_y_nnz_ctx;
233 uint8_t *above_uv_nnz_ctx[2];
234 uint8_t *above_skip_ctx; // 1bit
235 uint8_t *above_txfm_ctx; // 2bit
236 uint8_t *above_segpred_ctx; // 1bit
237 uint8_t *above_intra_ctx; // 1bit
238 uint8_t *above_comp_ctx; // 1bit
239 uint8_t *above_ref_ctx; // 2bit
240 uint8_t *above_filter_ctx;
241 VP56mv (*above_mv_ctx)[2];
244 uint8_t *intra_pred_data[3];
245 struct VP9Filter *lflvl;
246 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
248 // block reconstruction intermediates
249 int block_alloc_using_2pass;
250 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
251 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
252 struct { int x, y; } min_mv, max_mv;
253 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
254 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
255 uint16_t mvscale[3][2];
256 uint8_t mvstep[3][2];
259 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
261 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
262 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
264 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
265 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
269 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
271 VP9Context *s = ctx->priv_data;
274 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
276 sz = 64 * s->sb_cols * s->sb_rows;
277 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
278 ff_thread_release_buffer(ctx, &f->tf);
279 return AVERROR(ENOMEM);
282 f->segmentation_map = f->extradata->data;
283 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
307 dst->uses_2pass = src->uses_2pass;
312 static int update_size(AVCodecContext *ctx, int w, int h, enum AVPixelFormat fmt)
314 VP9Context *s = ctx->priv_data;
316 int bytesperpixel = s->bytesperpixel;
318 av_assert0(w > 0 && h > 0);
320 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height && ctx->pix_fmt == fmt)
326 s->sb_cols = (w + 63) >> 6;
327 s->sb_rows = (h + 63) >> 6;
328 s->cols = (w + 7) >> 3;
329 s->rows = (h + 7) >> 3;
331 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
332 av_freep(&s->intra_pred_data[0]);
333 // FIXME we slightly over-allocate here for subsampled chroma, but a little
334 // bit of padding shouldn't affect performance...
335 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
336 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
338 return AVERROR(ENOMEM);
339 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
340 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
341 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
342 assign(s->above_y_nnz_ctx, uint8_t *, 16);
343 assign(s->above_mode_ctx, uint8_t *, 16);
344 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
345 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
346 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
347 assign(s->above_partition_ctx, uint8_t *, 8);
348 assign(s->above_skip_ctx, uint8_t *, 8);
349 assign(s->above_txfm_ctx, uint8_t *, 8);
350 assign(s->above_segpred_ctx, uint8_t *, 8);
351 assign(s->above_intra_ctx, uint8_t *, 8);
352 assign(s->above_comp_ctx, uint8_t *, 8);
353 assign(s->above_ref_ctx, uint8_t *, 8);
354 assign(s->above_filter_ctx, uint8_t *, 8);
355 assign(s->lflvl, struct VP9Filter *, 1);
358 // these will be re-allocated a little later
359 av_freep(&s->b_base);
360 av_freep(&s->block_base);
362 if (s->bpp != s->last_bpp) {
363 ff_vp9dsp_init(&s->dsp, s->bpp);
364 ff_videodsp_init(&s->vdsp, s->bpp);
365 s->last_bpp = s->bpp;
371 static int update_block_buffers(AVCodecContext *ctx)
373 VP9Context *s = ctx->priv_data;
374 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
376 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
380 av_free(s->block_base);
381 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
382 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
383 if (s->frames[CUR_FRAME].uses_2pass) {
384 int sbs = s->sb_cols * s->sb_rows;
386 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
387 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
388 16 * 16 + 2 * chroma_eobs) * sbs);
389 if (!s->b_base || !s->block_base)
390 return AVERROR(ENOMEM);
391 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
392 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
393 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
394 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
395 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
397 s->b_base = av_malloc(sizeof(VP9Block));
398 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
399 16 * 16 + 2 * chroma_eobs);
400 if (!s->b_base || !s->block_base)
401 return AVERROR(ENOMEM);
402 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
403 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
404 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
405 s->uveob_base[0] = s->eob_base + 16 * 16;
406 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
408 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
413 // for some reason the sign bit is at the end, not the start, of a bit sequence
414 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
416 int v = get_bits(gb, n);
417 return get_bits1(gb) ? -v : v;
420 static av_always_inline int inv_recenter_nonneg(int v, int m)
422 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
425 // differential forward probability updates
426 static int update_prob(VP56RangeCoder *c, int p)
428 static const int inv_map_table[255] = {
429 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
430 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
431 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
432 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
433 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
434 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
435 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
436 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
437 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
438 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
439 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
440 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
441 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
442 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
443 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
444 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
445 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
446 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
451 /* This code is trying to do a differential probability update. For a
452 * current probability A in the range [1, 255], the difference to a new
453 * probability of any value can be expressed differentially as 1-A,255-A
454 * where some part of this (absolute range) exists both in positive as
455 * well as the negative part, whereas another part only exists in one
456 * half. We're trying to code this shared part differentially, i.e.
457 * times two where the value of the lowest bit specifies the sign, and
458 * the single part is then coded on top of this. This absolute difference
459 * then again has a value of [0,254], but a bigger value in this range
460 * indicates that we're further away from the original value A, so we
461 * can code this as a VLC code, since higher values are increasingly
462 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
463 * updates vs. the 'fine, exact' updates further down the range, which
464 * adds one extra dimension to this differential update model. */
466 if (!vp8_rac_get(c)) {
467 d = vp8_rac_get_uint(c, 4) + 0;
468 } else if (!vp8_rac_get(c)) {
469 d = vp8_rac_get_uint(c, 4) + 16;
470 } else if (!vp8_rac_get(c)) {
471 d = vp8_rac_get_uint(c, 5) + 32;
473 d = vp8_rac_get_uint(c, 7);
475 d = (d << 1) - 65 + vp8_rac_get(c);
477 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
480 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
481 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
484 static enum AVPixelFormat read_colorspace_details(AVCodecContext *ctx)
486 static const enum AVColorSpace colorspaces[8] = {
487 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
488 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
490 VP9Context *s = ctx->priv_data;
491 enum AVPixelFormat res;
492 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
495 s->bpp = 8 + bits * 2;
496 s->bytesperpixel = (7 + s->bpp) >> 3;
497 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
498 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
499 static const enum AVPixelFormat pix_fmt_rgb[3] = {
500 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
502 if (ctx->profile & 1) {
503 s->ss_h = s->ss_v = 0;
504 res = pix_fmt_rgb[bits];
505 ctx->color_range = AVCOL_RANGE_JPEG;
506 if (get_bits1(&s->gb)) {
507 av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
508 return AVERROR_INVALIDDATA;
511 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
513 return AVERROR_INVALIDDATA;
516 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
517 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
518 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
519 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
520 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
521 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
522 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
524 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
525 if (ctx->profile & 1) {
526 s->ss_h = get_bits1(&s->gb);
527 s->ss_v = get_bits1(&s->gb);
528 if ((res = pix_fmt_for_ss[bits][s->ss_v][s->ss_h]) == AV_PIX_FMT_YUV420P) {
529 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
531 return AVERROR_INVALIDDATA;
532 } else if (get_bits1(&s->gb)) {
533 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
535 return AVERROR_INVALIDDATA;
538 s->ss_h = s->ss_v = 1;
539 res = pix_fmt_for_ss[bits][1][1];
546 static int decode_frame_header(AVCodecContext *ctx,
547 const uint8_t *data, int size, int *ref)
549 VP9Context *s = ctx->priv_data;
550 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
551 enum AVPixelFormat fmt = ctx->pix_fmt;
553 const uint8_t *data2;
556 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
557 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
560 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
561 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
562 return AVERROR_INVALIDDATA;
564 ctx->profile = get_bits1(&s->gb);
565 ctx->profile |= get_bits1(&s->gb) << 1;
566 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
567 if (ctx->profile > 3) {
568 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
569 return AVERROR_INVALIDDATA;
571 if (get_bits1(&s->gb)) {
572 *ref = get_bits(&s->gb, 3);
575 s->last_keyframe = s->keyframe;
576 s->keyframe = !get_bits1(&s->gb);
577 last_invisible = s->invisible;
578 s->invisible = !get_bits1(&s->gb);
579 s->errorres = get_bits1(&s->gb);
580 s->use_last_frame_mvs = !s->errorres && !last_invisible;
582 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
583 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
584 return AVERROR_INVALIDDATA;
586 if ((fmt = read_colorspace_details(ctx)) < 0)
588 // for profile 1, here follows the subsampling bits
589 s->refreshrefmask = 0xff;
590 w = get_bits(&s->gb, 16) + 1;
591 h = get_bits(&s->gb, 16) + 1;
592 if (get_bits1(&s->gb)) // display size
593 skip_bits(&s->gb, 32);
595 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
596 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
598 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
599 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
600 return AVERROR_INVALIDDATA;
602 if (ctx->profile >= 1) {
603 if ((fmt = read_colorspace_details(ctx)) < 0)
606 s->ss_h = s->ss_v = 1;
609 s->bytesperpixel = 1;
610 fmt = AV_PIX_FMT_YUV420P;
611 ctx->colorspace = AVCOL_SPC_BT470BG;
612 ctx->color_range = AVCOL_RANGE_JPEG;
614 s->refreshrefmask = get_bits(&s->gb, 8);
615 w = get_bits(&s->gb, 16) + 1;
616 h = get_bits(&s->gb, 16) + 1;
617 if (get_bits1(&s->gb)) // display size
618 skip_bits(&s->gb, 32);
620 s->refreshrefmask = get_bits(&s->gb, 8);
621 s->refidx[0] = get_bits(&s->gb, 3);
622 s->signbias[0] = get_bits1(&s->gb) && !s->errorres;
623 s->refidx[1] = get_bits(&s->gb, 3);
624 s->signbias[1] = get_bits1(&s->gb) && !s->errorres;
625 s->refidx[2] = get_bits(&s->gb, 3);
626 s->signbias[2] = get_bits1(&s->gb) && !s->errorres;
627 if (!s->refs[s->refidx[0]].f->data[0] ||
628 !s->refs[s->refidx[1]].f->data[0] ||
629 !s->refs[s->refidx[2]].f->data[0]) {
630 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
631 return AVERROR_INVALIDDATA;
633 if (get_bits1(&s->gb)) {
634 w = s->refs[s->refidx[0]].f->width;
635 h = s->refs[s->refidx[0]].f->height;
636 } else if (get_bits1(&s->gb)) {
637 w = s->refs[s->refidx[1]].f->width;
638 h = s->refs[s->refidx[1]].f->height;
639 } else if (get_bits1(&s->gb)) {
640 w = s->refs[s->refidx[2]].f->width;
641 h = s->refs[s->refidx[2]].f->height;
643 w = get_bits(&s->gb, 16) + 1;
644 h = get_bits(&s->gb, 16) + 1;
646 // Note that in this code, "CUR_FRAME" is actually before we
647 // have formally allocated a frame, and thus actually represents
649 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
650 s->frames[CUR_FRAME].tf.f->height == h;
651 if (get_bits1(&s->gb)) // display size
652 skip_bits(&s->gb, 32);
653 s->highprecisionmvs = get_bits1(&s->gb);
654 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
656 s->allowcompinter = (s->signbias[0] != s->signbias[1] ||
657 s->signbias[0] != s->signbias[2]);
658 if (s->allowcompinter) {
659 if (s->signbias[0] == s->signbias[1]) {
661 s->varcompref[0] = 0;
662 s->varcompref[1] = 1;
663 } else if (s->signbias[0] == s->signbias[2]) {
665 s->varcompref[0] = 0;
666 s->varcompref[1] = 2;
669 s->varcompref[0] = 1;
670 s->varcompref[1] = 2;
674 for (i = 0; i < 3; i++) {
675 AVFrame *ref = s->refs[s->refidx[i]].f;
676 int refw = ref->width, refh = ref->height;
678 if (ref->format != fmt) {
679 av_log(ctx, AV_LOG_ERROR,
680 "Ref pixfmt (%s) did not match current frame (%s)",
681 av_get_pix_fmt_name(ref->format),
682 av_get_pix_fmt_name(fmt));
683 return AVERROR_INVALIDDATA;
684 } else if (refw == w && refh == h) {
685 s->mvscale[i][0] = s->mvscale[i][1] = 0;
687 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
688 av_log(ctx, AV_LOG_ERROR,
689 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
691 return AVERROR_INVALIDDATA;
693 s->mvscale[i][0] = (refw << 14) / w;
694 s->mvscale[i][1] = (refh << 14) / h;
695 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
696 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
701 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
702 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
703 s->framectxid = c = get_bits(&s->gb, 2);
705 /* loopfilter header data */
706 if (s->keyframe || s->errorres || s->intraonly) {
707 // reset loopfilter defaults
708 s->lf_delta.ref[0] = 1;
709 s->lf_delta.ref[1] = 0;
710 s->lf_delta.ref[2] = -1;
711 s->lf_delta.ref[3] = -1;
712 s->lf_delta.mode[0] = 0;
713 s->lf_delta.mode[1] = 0;
714 memset(s->segmentation.feat, 0, sizeof(s->segmentation.feat));
716 s->filter.level = get_bits(&s->gb, 6);
717 sharp = get_bits(&s->gb, 3);
718 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
719 // the old cache values since they are still valid
720 if (s->filter.sharpness != sharp)
721 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
722 s->filter.sharpness = sharp;
723 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
724 if (get_bits1(&s->gb)) {
725 for (i = 0; i < 4; i++)
726 if (get_bits1(&s->gb))
727 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
728 for (i = 0; i < 2; i++)
729 if (get_bits1(&s->gb))
730 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
734 /* quantization header data */
735 s->yac_qi = get_bits(&s->gb, 8);
736 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
737 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
738 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
739 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
740 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
742 ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
744 /* segmentation header info */
745 s->segmentation.ignore_refmap = 0;
746 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
747 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
748 for (i = 0; i < 7; i++)
749 s->prob.seg[i] = get_bits1(&s->gb) ?
750 get_bits(&s->gb, 8) : 255;
751 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
752 for (i = 0; i < 3; i++)
753 s->prob.segpred[i] = get_bits1(&s->gb) ?
754 get_bits(&s->gb, 8) : 255;
757 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
758 (w != s->frames[CUR_FRAME].tf.f->width ||
759 h != s->frames[CUR_FRAME].tf.f->height)) {
760 av_log(ctx, AV_LOG_WARNING,
761 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
762 s->segmentation.temporal, s->segmentation.update_map);
763 s->segmentation.ignore_refmap = 1;
764 //return AVERROR_INVALIDDATA;
767 if (get_bits1(&s->gb)) {
768 s->segmentation.absolute_vals = get_bits1(&s->gb);
769 for (i = 0; i < 8; i++) {
770 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
771 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
772 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
773 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
774 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
775 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
776 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
780 s->segmentation.feat[0].q_enabled = 0;
781 s->segmentation.feat[0].lf_enabled = 0;
782 s->segmentation.feat[0].skip_enabled = 0;
783 s->segmentation.feat[0].ref_enabled = 0;
786 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
787 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
788 int qyac, qydc, quvac, quvdc, lflvl, sh;
790 if (s->segmentation.feat[i].q_enabled) {
791 if (s->segmentation.absolute_vals)
792 qyac = av_clip_uintp2(s->segmentation.feat[i].q_val, 8);
794 qyac = av_clip_uintp2(s->yac_qi + s->segmentation.feat[i].q_val, 8);
798 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
799 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
800 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
801 qyac = av_clip_uintp2(qyac, 8);
803 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
804 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
805 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
806 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
808 sh = s->filter.level >= 32;
809 if (s->segmentation.feat[i].lf_enabled) {
810 if (s->segmentation.absolute_vals)
811 lflvl = av_clip_uintp2(s->segmentation.feat[i].lf_val, 6);
813 lflvl = av_clip_uintp2(s->filter.level + s->segmentation.feat[i].lf_val, 6);
815 lflvl = s->filter.level;
817 if (s->lf_delta.enabled) {
818 s->segmentation.feat[i].lflvl[0][0] =
819 s->segmentation.feat[i].lflvl[0][1] =
820 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
821 for (j = 1; j < 4; j++) {
822 s->segmentation.feat[i].lflvl[j][0] =
823 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
824 s->lf_delta.mode[0]) * (1 << sh)), 6);
825 s->segmentation.feat[i].lflvl[j][1] =
826 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
827 s->lf_delta.mode[1]) * (1 << sh)), 6);
830 memset(s->segmentation.feat[i].lflvl, lflvl,
831 sizeof(s->segmentation.feat[i].lflvl));
836 if ((res = update_size(ctx, w, h, fmt)) < 0) {
837 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n", w, h, fmt);
840 for (s->tiling.log2_tile_cols = 0;
841 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
842 s->tiling.log2_tile_cols++) ;
843 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
844 max = FFMAX(0, max - 1);
845 while (max > s->tiling.log2_tile_cols) {
846 if (get_bits1(&s->gb))
847 s->tiling.log2_tile_cols++;
851 s->tiling.log2_tile_rows = decode012(&s->gb);
852 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
853 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
854 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
855 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
856 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
858 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
859 return AVERROR(ENOMEM);
863 if (s->keyframe || s->errorres || (s->intraonly && s->resetctx == 3)) {
864 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
865 s->prob_ctx[3].p = vp9_default_probs;
866 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
867 sizeof(vp9_default_coef_probs));
868 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
869 sizeof(vp9_default_coef_probs));
870 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
871 sizeof(vp9_default_coef_probs));
872 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
873 sizeof(vp9_default_coef_probs));
874 } else if (s->intraonly && s->resetctx == 2) {
875 s->prob_ctx[c].p = vp9_default_probs;
876 memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
877 sizeof(vp9_default_coef_probs));
880 // next 16 bits is size of the rest of the header (arith-coded)
881 size2 = get_bits(&s->gb, 16);
882 data2 = align_get_bits(&s->gb);
883 if (size2 > size - (data2 - data)) {
884 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
885 return AVERROR_INVALIDDATA;
887 ff_vp56_init_range_decoder(&s->c, data2, size2);
888 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
889 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
890 return AVERROR_INVALIDDATA;
893 if (s->keyframe || s->intraonly) {
894 memset(s->counts.coef, 0, sizeof(s->counts.coef));
895 memset(s->counts.eob, 0, sizeof(s->counts.eob));
897 memset(&s->counts, 0, sizeof(s->counts));
899 // FIXME is it faster to not copy here, but do it down in the fw updates
900 // as explicit copies if the fw update is missing (and skip the copy upon
902 s->prob.p = s->prob_ctx[c].p;
906 s->txfmmode = TX_4X4;
908 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
909 if (s->txfmmode == 3)
910 s->txfmmode += vp8_rac_get(&s->c);
912 if (s->txfmmode == TX_SWITCHABLE) {
913 for (i = 0; i < 2; i++)
914 if (vp56_rac_get_prob_branchy(&s->c, 252))
915 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
916 for (i = 0; i < 2; i++)
917 for (j = 0; j < 2; j++)
918 if (vp56_rac_get_prob_branchy(&s->c, 252))
919 s->prob.p.tx16p[i][j] =
920 update_prob(&s->c, s->prob.p.tx16p[i][j]);
921 for (i = 0; i < 2; i++)
922 for (j = 0; j < 3; j++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.tx32p[i][j] =
925 update_prob(&s->c, s->prob.p.tx32p[i][j]);
930 for (i = 0; i < 4; i++) {
931 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
932 if (vp8_rac_get(&s->c)) {
933 for (j = 0; j < 2; j++)
934 for (k = 0; k < 2; k++)
935 for (l = 0; l < 6; l++)
936 for (m = 0; m < 6; m++) {
937 uint8_t *p = s->prob.coef[i][j][k][l][m];
938 uint8_t *r = ref[j][k][l][m];
939 if (m >= 3 && l == 0) // dc only has 3 pt
941 for (n = 0; n < 3; n++) {
942 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
943 p[n] = update_prob(&s->c, r[n]);
951 for (j = 0; j < 2; j++)
952 for (k = 0; k < 2; k++)
953 for (l = 0; l < 6; l++)
954 for (m = 0; m < 6; m++) {
955 uint8_t *p = s->prob.coef[i][j][k][l][m];
956 uint8_t *r = ref[j][k][l][m];
957 if (m > 3 && l == 0) // dc only has 3 pt
963 if (s->txfmmode == i)
968 for (i = 0; i < 3; i++)
969 if (vp56_rac_get_prob_branchy(&s->c, 252))
970 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
971 if (!s->keyframe && !s->intraonly) {
972 for (i = 0; i < 7; i++)
973 for (j = 0; j < 3; j++)
974 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 s->prob.p.mv_mode[i][j] =
976 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
978 if (s->filtermode == FILTER_SWITCHABLE)
979 for (i = 0; i < 4; i++)
980 for (j = 0; j < 2; j++)
981 if (vp56_rac_get_prob_branchy(&s->c, 252))
982 s->prob.p.filter[i][j] =
983 update_prob(&s->c, s->prob.p.filter[i][j]);
985 for (i = 0; i < 4; i++)
986 if (vp56_rac_get_prob_branchy(&s->c, 252))
987 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
989 if (s->allowcompinter) {
990 s->comppredmode = vp8_rac_get(&s->c);
992 s->comppredmode += vp8_rac_get(&s->c);
993 if (s->comppredmode == PRED_SWITCHABLE)
994 for (i = 0; i < 5; i++)
995 if (vp56_rac_get_prob_branchy(&s->c, 252))
997 update_prob(&s->c, s->prob.p.comp[i]);
999 s->comppredmode = PRED_SINGLEREF;
1002 if (s->comppredmode != PRED_COMPREF) {
1003 for (i = 0; i < 5; i++) {
1004 if (vp56_rac_get_prob_branchy(&s->c, 252))
1005 s->prob.p.single_ref[i][0] =
1006 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.single_ref[i][1] =
1009 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1013 if (s->comppredmode != PRED_SINGLEREF) {
1014 for (i = 0; i < 5; i++)
1015 if (vp56_rac_get_prob_branchy(&s->c, 252))
1016 s->prob.p.comp_ref[i] =
1017 update_prob(&s->c, s->prob.p.comp_ref[i]);
1020 for (i = 0; i < 4; i++)
1021 for (j = 0; j < 9; j++)
1022 if (vp56_rac_get_prob_branchy(&s->c, 252))
1023 s->prob.p.y_mode[i][j] =
1024 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1026 for (i = 0; i < 4; i++)
1027 for (j = 0; j < 4; j++)
1028 for (k = 0; k < 3; k++)
1029 if (vp56_rac_get_prob_branchy(&s->c, 252))
1030 s->prob.p.partition[3 - i][j][k] =
1031 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1033 // mv fields don't use the update_prob subexp model for some reason
1034 for (i = 0; i < 3; i++)
1035 if (vp56_rac_get_prob_branchy(&s->c, 252))
1036 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1038 for (i = 0; i < 2; i++) {
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1042 for (j = 0; j < 10; j++)
1043 if (vp56_rac_get_prob_branchy(&s->c, 252))
1044 s->prob.p.mv_comp[i].classes[j] =
1045 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1047 if (vp56_rac_get_prob_branchy(&s->c, 252))
1048 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050 for (j = 0; j < 10; j++)
1051 if (vp56_rac_get_prob_branchy(&s->c, 252))
1052 s->prob.p.mv_comp[i].bits[j] =
1053 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1056 for (i = 0; i < 2; i++) {
1057 for (j = 0; j < 2; j++)
1058 for (k = 0; k < 3; k++)
1059 if (vp56_rac_get_prob_branchy(&s->c, 252))
1060 s->prob.p.mv_comp[i].class0_fp[j][k] =
1061 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1063 for (j = 0; j < 3; j++)
1064 if (vp56_rac_get_prob_branchy(&s->c, 252))
1065 s->prob.p.mv_comp[i].fp[j] =
1066 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1069 if (s->highprecisionmvs) {
1070 for (i = 0; i < 2; i++) {
1071 if (vp56_rac_get_prob_branchy(&s->c, 252))
1072 s->prob.p.mv_comp[i].class0_hp =
1073 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1075 if (vp56_rac_get_prob_branchy(&s->c, 252))
1076 s->prob.p.mv_comp[i].hp =
1077 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1082 return (data2 - data) + size2;
1085 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1088 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1089 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1092 static void find_ref_mvs(VP9Context *s,
1093 VP56mv *pmv, int ref, int z, int idx, int sb)
1095 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1096 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1097 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1098 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1099 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1100 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1101 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1102 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1103 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1104 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1105 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1106 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1107 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1108 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1109 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1110 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1111 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1112 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1113 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1114 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1115 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1116 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1117 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1118 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1119 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1120 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1121 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1124 int row = s->row, col = s->col, row7 = s->row7;
1125 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1126 #define INVALID_MV 0x80008000U
1127 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1130 #define RETURN_DIRECT_MV(mv) \
1132 uint32_t m = AV_RN32A(&mv); \
1136 } else if (mem == INVALID_MV) { \
1138 } else if (m != mem) { \
1145 if (sb == 2 || sb == 1) {
1146 RETURN_DIRECT_MV(b->mv[0][z]);
1147 } else if (sb == 3) {
1148 RETURN_DIRECT_MV(b->mv[2][z]);
1149 RETURN_DIRECT_MV(b->mv[1][z]);
1150 RETURN_DIRECT_MV(b->mv[0][z]);
1153 #define RETURN_MV(mv) \
1158 av_assert2(idx == 1); \
1159 av_assert2(mem != INVALID_MV); \
1160 if (mem_sub8x8 == INVALID_MV) { \
1161 clamp_mv(&tmp, &mv, s); \
1162 m = AV_RN32A(&tmp); \
1167 mem_sub8x8 = AV_RN32A(&mv); \
1168 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1169 clamp_mv(&tmp, &mv, s); \
1170 m = AV_RN32A(&tmp); \
1174 /* BUG I'm pretty sure this isn't the intention */ \
1180 uint32_t m = AV_RN32A(&mv); \
1182 clamp_mv(pmv, &mv, s); \
1184 } else if (mem == INVALID_MV) { \
1186 } else if (m != mem) { \
1187 clamp_mv(pmv, &mv, s); \
1194 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1195 if (mv->ref[0] == ref) {
1196 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1197 } else if (mv->ref[1] == ref) {
1198 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1201 if (col > s->tiling.tile_col_start) {
1202 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1203 if (mv->ref[0] == ref) {
1204 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1205 } else if (mv->ref[1] == ref) {
1206 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1214 // previously coded MVs in this neighbourhood, using same reference frame
1215 for (; i < 8; i++) {
1216 int c = p[i][0] + col, r = p[i][1] + row;
1218 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1219 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1221 if (mv->ref[0] == ref) {
1222 RETURN_MV(mv->mv[0]);
1223 } else if (mv->ref[1] == ref) {
1224 RETURN_MV(mv->mv[1]);
1229 // MV at this position in previous frame, using same reference frame
1230 if (s->use_last_frame_mvs) {
1231 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1233 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1234 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1235 if (mv->ref[0] == ref) {
1236 RETURN_MV(mv->mv[0]);
1237 } else if (mv->ref[1] == ref) {
1238 RETURN_MV(mv->mv[1]);
1242 #define RETURN_SCALE_MV(mv, scale) \
1245 VP56mv mv_temp = { -mv.x, -mv.y }; \
1246 RETURN_MV(mv_temp); \
1252 // previously coded MVs in this neighbourhood, using different reference frame
1253 for (i = 0; i < 8; i++) {
1254 int c = p[i][0] + col, r = p[i][1] + row;
1256 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1257 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1259 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1260 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1262 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1263 // BUG - libvpx has this condition regardless of whether
1264 // we used the first ref MV and pre-scaling
1265 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1266 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1271 // MV at this position in previous frame, using different reference frame
1272 if (s->use_last_frame_mvs) {
1273 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1275 // no need to await_progress, because we already did that above
1276 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1277 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1279 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1280 // BUG - libvpx has this condition regardless of whether
1281 // we used the first ref MV and pre-scaling
1282 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1283 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1288 clamp_mv(pmv, pmv, s);
1291 #undef RETURN_SCALE_MV
1294 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1296 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1297 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1298 s->prob.p.mv_comp[idx].classes);
1300 s->counts.mv_comp[idx].sign[sign]++;
1301 s->counts.mv_comp[idx].classes[c]++;
1305 for (n = 0, m = 0; m < c; m++) {
1306 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1308 s->counts.mv_comp[idx].bits[m][bit]++;
1311 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1313 s->counts.mv_comp[idx].fp[bit]++;
1315 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1316 s->counts.mv_comp[idx].hp[bit]++;
1320 // bug in libvpx - we count for bw entropy purposes even if the
1322 s->counts.mv_comp[idx].hp[1]++;
1326 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1327 s->counts.mv_comp[idx].class0[n]++;
1328 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1329 s->prob.p.mv_comp[idx].class0_fp[n]);
1330 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1331 n = (n << 3) | (bit << 1);
1333 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1334 s->counts.mv_comp[idx].class0_hp[bit]++;
1338 // bug in libvpx - we count for bw entropy purposes even if the
1340 s->counts.mv_comp[idx].class0_hp[1]++;
1344 return sign ? -(n + 1) : (n + 1);
1347 static void fill_mv(VP9Context *s,
1348 VP56mv *mv, int mode, int sb)
1352 if (mode == ZEROMV) {
1357 // FIXME cache this value and reuse for other subblocks
1358 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1359 mode == NEWMV ? -1 : sb);
1360 // FIXME maybe move this code into find_ref_mvs()
1361 if ((mode == NEWMV || sb == -1) &&
1362 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1376 if (mode == NEWMV) {
1377 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1378 s->prob.p.mv_joint);
1380 s->counts.mv_joint[j]++;
1381 if (j >= MV_JOINT_V)
1382 mv[0].y += read_mv_component(s, 0, hp);
1384 mv[0].x += read_mv_component(s, 1, hp);
1388 // FIXME cache this value and reuse for other subblocks
1389 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1390 mode == NEWMV ? -1 : sb);
1391 if ((mode == NEWMV || sb == -1) &&
1392 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1406 if (mode == NEWMV) {
1407 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1408 s->prob.p.mv_joint);
1410 s->counts.mv_joint[j]++;
1411 if (j >= MV_JOINT_V)
1412 mv[1].y += read_mv_component(s, 0, hp);
1414 mv[1].x += read_mv_component(s, 1, hp);
1420 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1421 ptrdiff_t stride, int v)
1431 int v16 = v * 0x0101;
1439 uint32_t v32 = v * 0x01010101;
1448 uint64_t v64 = v * 0x0101010101010101ULL;
1454 uint32_t v32 = v * 0x01010101;
1457 AV_WN32A(ptr + 4, v32);
1466 static void decode_mode(AVCodecContext *ctx)
1468 static const uint8_t left_ctx[N_BS_SIZES] = {
1469 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1471 static const uint8_t above_ctx[N_BS_SIZES] = {
1472 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1474 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1475 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1476 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1478 VP9Context *s = ctx->priv_data;
1480 int row = s->row, col = s->col, row7 = s->row7;
1481 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1482 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1483 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1484 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1485 int vref, filter_id;
1487 if (!s->segmentation.enabled) {
1489 } else if (s->keyframe || s->intraonly) {
1490 b->seg_id = !s->segmentation.update_map ? 0 :
1491 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1492 } else if (!s->segmentation.update_map ||
1493 (s->segmentation.temporal &&
1494 vp56_rac_get_prob_branchy(&s->c,
1495 s->prob.segpred[s->above_segpred_ctx[col] +
1496 s->left_segpred_ctx[row7]]))) {
1497 if (!s->errorres && !s->segmentation.ignore_refmap) {
1499 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1501 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1502 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1503 for (y = 0; y < h4; y++) {
1504 int idx_base = (y + row) * 8 * s->sb_cols + col;
1505 for (x = 0; x < w4; x++)
1506 pred = FFMIN(pred, refsegmap[idx_base + x]);
1508 av_assert1(pred < 8);
1514 memset(&s->above_segpred_ctx[col], 1, w4);
1515 memset(&s->left_segpred_ctx[row7], 1, h4);
1517 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1520 memset(&s->above_segpred_ctx[col], 0, w4);
1521 memset(&s->left_segpred_ctx[row7], 0, h4);
1523 if (s->segmentation.enabled &&
1524 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1525 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1526 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1529 b->skip = s->segmentation.enabled &&
1530 s->segmentation.feat[b->seg_id].skip_enabled;
1532 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1533 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1534 s->counts.skip[c][b->skip]++;
1537 if (s->keyframe || s->intraonly) {
1539 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1540 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1544 if (have_a && have_l) {
1545 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1548 c = have_a ? 2 * s->above_intra_ctx[col] :
1549 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1551 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1552 s->counts.intra[c][bit]++;
1556 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1560 c = (s->above_skip_ctx[col] ? max_tx :
1561 s->above_txfm_ctx[col]) +
1562 (s->left_skip_ctx[row7] ? max_tx :
1563 s->left_txfm_ctx[row7]) > max_tx;
1565 c = s->above_skip_ctx[col] ? 1 :
1566 (s->above_txfm_ctx[col] * 2 > max_tx);
1568 } else if (have_l) {
1569 c = s->left_skip_ctx[row7] ? 1 :
1570 (s->left_txfm_ctx[row7] * 2 > max_tx);
1576 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1578 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1580 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1582 s->counts.tx32p[c][b->tx]++;
1585 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1587 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1588 s->counts.tx16p[c][b->tx]++;
1591 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1592 s->counts.tx8p[c][b->tx]++;
1599 b->tx = FFMIN(max_tx, s->txfmmode);
1602 if (s->keyframe || s->intraonly) {
1603 uint8_t *a = &s->above_mode_ctx[col * 2];
1604 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1607 if (b->bs > BS_8x8) {
1608 // FIXME the memory storage intermediates here aren't really
1609 // necessary, they're just there to make the code slightly
1611 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1612 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1613 if (b->bs != BS_8x4) {
1614 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1615 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1616 l[0] = a[1] = b->mode[1];
1618 l[0] = a[1] = b->mode[1] = b->mode[0];
1620 if (b->bs != BS_4x8) {
1621 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1622 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1623 if (b->bs != BS_8x4) {
1624 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1625 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1626 l[1] = a[1] = b->mode[3];
1628 l[1] = a[1] = b->mode[3] = b->mode[2];
1631 b->mode[2] = b->mode[0];
1632 l[1] = a[1] = b->mode[3] = b->mode[1];
1635 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1636 vp9_default_kf_ymode_probs[*a][*l]);
1637 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1638 // FIXME this can probably be optimized
1639 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1640 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1642 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1643 vp9_default_kf_uvmode_probs[b->mode[3]]);
1644 } else if (b->intra) {
1646 if (b->bs > BS_8x8) {
1647 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1648 s->prob.p.y_mode[0]);
1649 s->counts.y_mode[0][b->mode[0]]++;
1650 if (b->bs != BS_8x4) {
1651 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1652 s->prob.p.y_mode[0]);
1653 s->counts.y_mode[0][b->mode[1]]++;
1655 b->mode[1] = b->mode[0];
1657 if (b->bs != BS_4x8) {
1658 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1659 s->prob.p.y_mode[0]);
1660 s->counts.y_mode[0][b->mode[2]]++;
1661 if (b->bs != BS_8x4) {
1662 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1663 s->prob.p.y_mode[0]);
1664 s->counts.y_mode[0][b->mode[3]]++;
1666 b->mode[3] = b->mode[2];
1669 b->mode[2] = b->mode[0];
1670 b->mode[3] = b->mode[1];
1673 static const uint8_t size_group[10] = {
1674 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1676 int sz = size_group[b->bs];
1678 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1679 s->prob.p.y_mode[sz]);
1680 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1681 s->counts.y_mode[sz][b->mode[3]]++;
1683 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1684 s->prob.p.uv_mode[b->mode[3]]);
1685 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1687 static const uint8_t inter_mode_ctx_lut[14][14] = {
1688 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1689 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1690 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1691 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1692 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1693 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1694 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1695 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1696 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1697 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1698 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1699 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1700 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1701 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1704 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1705 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1707 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1709 // read comp_pred flag
1710 if (s->comppredmode != PRED_SWITCHABLE) {
1711 b->comp = s->comppredmode == PRED_COMPREF;
1715 // FIXME add intra as ref=0xff (or -1) to make these easier?
1718 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1720 } else if (s->above_comp_ctx[col]) {
1721 c = 2 + (s->left_intra_ctx[row7] ||
1722 s->left_ref_ctx[row7] == s->fixcompref);
1723 } else if (s->left_comp_ctx[row7]) {
1724 c = 2 + (s->above_intra_ctx[col] ||
1725 s->above_ref_ctx[col] == s->fixcompref);
1727 c = (!s->above_intra_ctx[col] &&
1728 s->above_ref_ctx[col] == s->fixcompref) ^
1729 (!s->left_intra_ctx[row7] &&
1730 s->left_ref_ctx[row & 7] == s->fixcompref);
1733 c = s->above_comp_ctx[col] ? 3 :
1734 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1736 } else if (have_l) {
1737 c = s->left_comp_ctx[row7] ? 3 :
1738 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1742 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1743 s->counts.comp[c][b->comp]++;
1746 // read actual references
1747 // FIXME probably cache a few variables here to prevent repetitive
1748 // memory accesses below
1749 if (b->comp) /* two references */ {
1750 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1752 b->ref[fix_idx] = s->fixcompref;
1753 // FIXME can this codeblob be replaced by some sort of LUT?
1756 if (s->above_intra_ctx[col]) {
1757 if (s->left_intra_ctx[row7]) {
1760 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1762 } else if (s->left_intra_ctx[row7]) {
1763 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1765 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1767 if (refl == refa && refa == s->varcompref[1]) {
1769 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1770 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1771 (refl == s->fixcompref && refa == s->varcompref[0])) {
1774 c = (refa == refl) ? 3 : 1;
1776 } else if (!s->left_comp_ctx[row7]) {
1777 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1780 c = (refl == s->varcompref[1] &&
1781 refa != s->varcompref[1]) ? 2 : 4;
1783 } else if (!s->above_comp_ctx[col]) {
1784 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1787 c = (refa == s->varcompref[1] &&
1788 refl != s->varcompref[1]) ? 2 : 4;
1791 c = (refl == refa) ? 4 : 2;
1795 if (s->above_intra_ctx[col]) {
1797 } else if (s->above_comp_ctx[col]) {
1798 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1800 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1803 } else if (have_l) {
1804 if (s->left_intra_ctx[row7]) {
1806 } else if (s->left_comp_ctx[row7]) {
1807 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1809 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1814 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1815 b->ref[var_idx] = s->varcompref[bit];
1816 s->counts.comp_ref[c][bit]++;
1817 } else /* single reference */ {
1820 if (have_a && !s->above_intra_ctx[col]) {
1821 if (have_l && !s->left_intra_ctx[row7]) {
1822 if (s->left_comp_ctx[row7]) {
1823 if (s->above_comp_ctx[col]) {
1824 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1825 !s->above_ref_ctx[col]);
1827 c = (3 * !s->above_ref_ctx[col]) +
1828 (!s->fixcompref || !s->left_ref_ctx[row7]);
1830 } else if (s->above_comp_ctx[col]) {
1831 c = (3 * !s->left_ref_ctx[row7]) +
1832 (!s->fixcompref || !s->above_ref_ctx[col]);
1834 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1836 } else if (s->above_intra_ctx[col]) {
1838 } else if (s->above_comp_ctx[col]) {
1839 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1841 c = 4 * (!s->above_ref_ctx[col]);
1843 } else if (have_l && !s->left_intra_ctx[row7]) {
1844 if (s->left_intra_ctx[row7]) {
1846 } else if (s->left_comp_ctx[row7]) {
1847 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1849 c = 4 * (!s->left_ref_ctx[row7]);
1854 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1855 s->counts.single_ref[c][0][bit]++;
1859 // FIXME can this codeblob be replaced by some sort of LUT?
1862 if (s->left_intra_ctx[row7]) {
1863 if (s->above_intra_ctx[col]) {
1865 } else if (s->above_comp_ctx[col]) {
1866 c = 1 + 2 * (s->fixcompref == 1 ||
1867 s->above_ref_ctx[col] == 1);
1868 } else if (!s->above_ref_ctx[col]) {
1871 c = 4 * (s->above_ref_ctx[col] == 1);
1873 } else if (s->above_intra_ctx[col]) {
1874 if (s->left_intra_ctx[row7]) {
1876 } else if (s->left_comp_ctx[row7]) {
1877 c = 1 + 2 * (s->fixcompref == 1 ||
1878 s->left_ref_ctx[row7] == 1);
1879 } else if (!s->left_ref_ctx[row7]) {
1882 c = 4 * (s->left_ref_ctx[row7] == 1);
1884 } else if (s->above_comp_ctx[col]) {
1885 if (s->left_comp_ctx[row7]) {
1886 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1887 c = 3 * (s->fixcompref == 1 ||
1888 s->left_ref_ctx[row7] == 1);
1892 } else if (!s->left_ref_ctx[row7]) {
1893 c = 1 + 2 * (s->fixcompref == 1 ||
1894 s->above_ref_ctx[col] == 1);
1896 c = 3 * (s->left_ref_ctx[row7] == 1) +
1897 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1899 } else if (s->left_comp_ctx[row7]) {
1900 if (!s->above_ref_ctx[col]) {
1901 c = 1 + 2 * (s->fixcompref == 1 ||
1902 s->left_ref_ctx[row7] == 1);
1904 c = 3 * (s->above_ref_ctx[col] == 1) +
1905 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1907 } else if (!s->above_ref_ctx[col]) {
1908 if (!s->left_ref_ctx[row7]) {
1911 c = 4 * (s->left_ref_ctx[row7] == 1);
1913 } else if (!s->left_ref_ctx[row7]) {
1914 c = 4 * (s->above_ref_ctx[col] == 1);
1916 c = 2 * (s->left_ref_ctx[row7] == 1) +
1917 2 * (s->above_ref_ctx[col] == 1);
1920 if (s->above_intra_ctx[col] ||
1921 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1923 } else if (s->above_comp_ctx[col]) {
1924 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1926 c = 4 * (s->above_ref_ctx[col] == 1);
1929 } else if (have_l) {
1930 if (s->left_intra_ctx[row7] ||
1931 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1933 } else if (s->left_comp_ctx[row7]) {
1934 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1936 c = 4 * (s->left_ref_ctx[row7] == 1);
1941 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1942 s->counts.single_ref[c][1][bit]++;
1943 b->ref[0] = 1 + bit;
1948 if (b->bs <= BS_8x8) {
1949 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1950 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1952 static const uint8_t off[10] = {
1953 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1956 // FIXME this needs to use the LUT tables from find_ref_mvs
1957 // because not all are -1,0/0,-1
1958 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1959 [s->left_mode_ctx[row7 + off[b->bs]]];
1961 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1962 s->prob.p.mv_mode[c]);
1963 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1964 s->counts.mv_mode[c][b->mode[0] - 10]++;
1968 if (s->filtermode == FILTER_SWITCHABLE) {
1971 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1972 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1973 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1974 s->left_filter_ctx[row7] : 3;
1976 c = s->above_filter_ctx[col];
1978 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1979 c = s->left_filter_ctx[row7];
1984 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1985 s->prob.p.filter[c]);
1986 s->counts.filter[c][filter_id]++;
1987 b->filter = vp9_filter_lut[filter_id];
1989 b->filter = s->filtermode;
1992 if (b->bs > BS_8x8) {
1993 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1995 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1996 s->prob.p.mv_mode[c]);
1997 s->counts.mv_mode[c][b->mode[0] - 10]++;
1998 fill_mv(s, b->mv[0], b->mode[0], 0);
2000 if (b->bs != BS_8x4) {
2001 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2002 s->prob.p.mv_mode[c]);
2003 s->counts.mv_mode[c][b->mode[1] - 10]++;
2004 fill_mv(s, b->mv[1], b->mode[1], 1);
2006 b->mode[1] = b->mode[0];
2007 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2008 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2011 if (b->bs != BS_4x8) {
2012 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2013 s->prob.p.mv_mode[c]);
2014 s->counts.mv_mode[c][b->mode[2] - 10]++;
2015 fill_mv(s, b->mv[2], b->mode[2], 2);
2017 if (b->bs != BS_8x4) {
2018 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
2019 s->prob.p.mv_mode[c]);
2020 s->counts.mv_mode[c][b->mode[3] - 10]++;
2021 fill_mv(s, b->mv[3], b->mode[3], 3);
2023 b->mode[3] = b->mode[2];
2024 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
2025 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
2028 b->mode[2] = b->mode[0];
2029 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2030 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2031 b->mode[3] = b->mode[1];
2032 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2033 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2036 fill_mv(s, b->mv[0], b->mode[0], -1);
2037 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2038 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2039 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2040 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2041 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2042 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2045 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
2049 #define SPLAT_CTX(var, val, n) \
2051 case 1: var = val; break; \
2052 case 2: AV_WN16A(&var, val * 0x0101); break; \
2053 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2054 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2056 uint64_t v64 = val * 0x0101010101010101ULL; \
2057 AV_WN64A( &var, v64); \
2058 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2063 #define SPLAT_CTX(var, val, n) \
2065 case 1: var = val; break; \
2066 case 2: AV_WN16A(&var, val * 0x0101); break; \
2067 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2069 uint32_t v32 = val * 0x01010101; \
2070 AV_WN32A( &var, v32); \
2071 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2075 uint32_t v32 = val * 0x01010101; \
2076 AV_WN32A( &var, v32); \
2077 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2078 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2079 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2085 switch (bwh_tab[1][b->bs][0]) {
2086 #define SET_CTXS(dir, off, n) \
2088 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2089 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2090 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2091 if (!s->keyframe && !s->intraonly) { \
2092 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2093 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2094 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2096 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2097 if (s->filtermode == FILTER_SWITCHABLE) { \
2098 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2103 case 1: SET_CTXS(above, col, 1); break;
2104 case 2: SET_CTXS(above, col, 2); break;
2105 case 4: SET_CTXS(above, col, 4); break;
2106 case 8: SET_CTXS(above, col, 8); break;
2108 switch (bwh_tab[1][b->bs][1]) {
2109 case 1: SET_CTXS(left, row7, 1); break;
2110 case 2: SET_CTXS(left, row7, 2); break;
2111 case 4: SET_CTXS(left, row7, 4); break;
2112 case 8: SET_CTXS(left, row7, 8); break;
2117 if (!s->keyframe && !s->intraonly) {
2118 if (b->bs > BS_8x8) {
2119 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2121 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2122 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2123 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2124 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2125 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2126 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2127 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2128 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2130 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2132 for (n = 0; n < w4 * 2; n++) {
2133 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2134 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2136 for (n = 0; n < h4 * 2; n++) {
2137 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2138 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2144 for (y = 0; y < h4; y++) {
2145 int x, o = (row + y) * s->sb_cols * 8 + col;
2146 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2149 for (x = 0; x < w4; x++) {
2153 } else if (b->comp) {
2154 for (x = 0; x < w4; x++) {
2155 mv[x].ref[0] = b->ref[0];
2156 mv[x].ref[1] = b->ref[1];
2157 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2158 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2161 for (x = 0; x < w4; x++) {
2162 mv[x].ref[0] = b->ref[0];
2164 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2170 // FIXME merge cnt/eob arguments?
2171 static av_always_inline int
2172 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2173 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2174 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2175 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2176 const int16_t *band_counts, const int16_t *qmul)
2178 int i = 0, band = 0, band_left = band_counts[band];
2179 uint8_t *tp = p[0][nnz];
2180 uint8_t cache[1024];
2185 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2186 eob[band][nnz][val]++;
2191 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2192 cnt[band][nnz][0]++;
2194 band_left = band_counts[++band];
2196 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2198 if (++i == n_coeffs)
2199 break; //invalid input; blocks should end with EOB
2204 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2205 cnt[band][nnz][1]++;
2209 // fill in p[3-10] (model fill) - only once per frame for each pos
2211 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2213 cnt[band][nnz][2]++;
2214 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2215 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2216 cache[rc] = val = 2;
2218 val = 3 + vp56_rac_get_prob(c, tp[5]);
2221 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2223 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2224 val = 5 + vp56_rac_get_prob(c, 159);
2226 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2227 val += vp56_rac_get_prob(c, 145);
2231 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2232 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2233 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2234 val += (vp56_rac_get_prob(c, 148) << 1);
2235 val += vp56_rac_get_prob(c, 140);
2237 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2238 val += (vp56_rac_get_prob(c, 155) << 2);
2239 val += (vp56_rac_get_prob(c, 140) << 1);
2240 val += vp56_rac_get_prob(c, 135);
2242 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2243 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2244 val += (vp56_rac_get_prob(c, 157) << 3);
2245 val += (vp56_rac_get_prob(c, 141) << 2);
2246 val += (vp56_rac_get_prob(c, 134) << 1);
2247 val += vp56_rac_get_prob(c, 130);
2250 if (!is8bitsperpixel) {
2252 val += vp56_rac_get_prob(c, 255) << 17;
2253 val += vp56_rac_get_prob(c, 255) << 16;
2255 val += (vp56_rac_get_prob(c, 255) << 15);
2256 val += (vp56_rac_get_prob(c, 255) << 14);
2258 val += (vp56_rac_get_prob(c, 254) << 13);
2259 val += (vp56_rac_get_prob(c, 254) << 12);
2260 val += (vp56_rac_get_prob(c, 254) << 11);
2261 val += (vp56_rac_get_prob(c, 252) << 10);
2262 val += (vp56_rac_get_prob(c, 249) << 9);
2263 val += (vp56_rac_get_prob(c, 243) << 8);
2264 val += (vp56_rac_get_prob(c, 230) << 7);
2265 val += (vp56_rac_get_prob(c, 196) << 6);
2266 val += (vp56_rac_get_prob(c, 177) << 5);
2267 val += (vp56_rac_get_prob(c, 153) << 4);
2268 val += (vp56_rac_get_prob(c, 140) << 3);
2269 val += (vp56_rac_get_prob(c, 133) << 2);
2270 val += (vp56_rac_get_prob(c, 130) << 1);
2271 val += vp56_rac_get_prob(c, 129);
2275 #define STORE_COEF(c, i, v) do { \
2276 if (is8bitsperpixel) { \
2279 AV_WN32A(&c[i * 2], v); \
2283 band_left = band_counts[++band];
2285 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2287 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2288 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2290 } while (++i < n_coeffs);
2295 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2296 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2297 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2298 const int16_t (*nb)[2], const int16_t *band_counts,
2299 const int16_t *qmul)
2301 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2302 nnz, scan, nb, band_counts, qmul);
2305 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2306 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2307 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2308 const int16_t (*nb)[2], const int16_t *band_counts,
2309 const int16_t *qmul)
2311 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2312 nnz, scan, nb, band_counts, qmul);
2315 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2316 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2317 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2318 const int16_t (*nb)[2], const int16_t *band_counts,
2319 const int16_t *qmul)
2321 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->bpp, cnt, eob, p,
2322 nnz, scan, nb, band_counts, qmul);
2325 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2326 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2327 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2328 const int16_t (*nb)[2], const int16_t *band_counts,
2329 const int16_t *qmul)
2331 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->bpp, cnt, eob, p,
2332 nnz, scan, nb, band_counts, qmul);
2335 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2337 VP9Context *s = ctx->priv_data;
2339 int row = s->row, col = s->col;
2340 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2341 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2342 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2343 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2344 int end_x = FFMIN(2 * (s->cols - col), w4);
2345 int end_y = FFMIN(2 * (s->rows - row), h4);
2346 int n, pl, x, y, res;
2347 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2348 int tx = 4 * s->lossless + b->tx;
2349 const int16_t * const *yscans = vp9_scans[tx];
2350 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2351 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2352 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2353 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2354 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2355 static const int16_t band_counts[4][8] = {
2356 { 1, 2, 3, 4, 3, 16 - 13 },
2357 { 1, 2, 3, 4, 11, 64 - 21 },
2358 { 1, 2, 3, 4, 11, 256 - 21 },
2359 { 1, 2, 3, 4, 11, 1024 - 21 },
2361 const int16_t *y_band_counts = band_counts[b->tx];
2362 const int16_t *uv_band_counts = band_counts[b->uvtx];
2363 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2364 int total_coeff = 0;
2366 #define MERGE(la, end, step, rd) \
2367 for (n = 0; n < end; n += step) \
2368 la[n] = !!rd(&la[n])
2369 #define MERGE_CTX(step, rd) \
2371 MERGE(l, end_y, step, rd); \
2372 MERGE(a, end_x, step, rd); \
2375 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2376 for (n = 0, y = 0; y < end_y; y += step) { \
2377 for (x = 0; x < end_x; x += step, n += step * step) { \
2378 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2379 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2380 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2381 c, e, p, a[x] + l[y], yscans[txtp], \
2382 ynbs[txtp], y_band_counts, qmul[0]); \
2383 a[x] = l[y] = !!res; \
2384 total_coeff |= !!res; \
2386 AV_WN16A(&s->eob[n], res); \
2393 #define SPLAT(la, end, step, cond) \
2395 for (n = 1; n < end; n += step) \
2396 la[n] = la[n - 1]; \
2397 } else if (step == 4) { \
2399 for (n = 0; n < end; n += step) \
2400 AV_WN32A(&la[n], la[n] * 0x01010101); \
2402 for (n = 0; n < end; n += step) \
2403 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2405 } else /* step == 8 */ { \
2407 if (HAVE_FAST_64BIT) { \
2408 for (n = 0; n < end; n += step) \
2409 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2411 for (n = 0; n < end; n += step) { \
2412 uint32_t v32 = la[n] * 0x01010101; \
2413 AV_WN32A(&la[n], v32); \
2414 AV_WN32A(&la[n + 4], v32); \
2418 for (n = 0; n < end; n += step) \
2419 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2422 #define SPLAT_CTX(step) \
2424 SPLAT(a, end_x, step, end_x == w4); \
2425 SPLAT(l, end_y, step, end_y == h4); \
2431 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2434 MERGE_CTX(2, AV_RN16A);
2435 DECODE_Y_COEF_LOOP(2, 0,);
2439 MERGE_CTX(4, AV_RN32A);
2440 DECODE_Y_COEF_LOOP(4, 0,);
2444 MERGE_CTX(8, AV_RN64A);
2445 DECODE_Y_COEF_LOOP(8, 0, 32);
2450 #define DECODE_UV_COEF_LOOP(step, v) \
2451 for (n = 0, y = 0; y < end_y; y += step) { \
2452 for (x = 0; x < end_x; x += step, n += step * step) { \
2453 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2454 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2455 16 * step * step, c, e, p, a[x] + l[y], \
2456 uvscan, uvnb, uv_band_counts, qmul[1]); \
2457 a[x] = l[y] = !!res; \
2458 total_coeff |= !!res; \
2460 AV_WN16A(&s->uveob[pl][n], res); \
2462 s->uveob[pl][n] = res; \
2467 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2468 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2469 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2474 for (pl = 0; pl < 2; pl++) {
2475 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2476 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2479 DECODE_UV_COEF_LOOP(1,);
2482 MERGE_CTX(2, AV_RN16A);
2483 DECODE_UV_COEF_LOOP(2,);
2487 MERGE_CTX(4, AV_RN32A);
2488 DECODE_UV_COEF_LOOP(4,);
2492 MERGE_CTX(8, AV_RN64A);
2493 DECODE_UV_COEF_LOOP(8, 32);
2502 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2504 return decode_coeffs(ctx, 1);
2507 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2509 return decode_coeffs(ctx, 0);
2512 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2513 uint8_t *dst_edge, ptrdiff_t stride_edge,
2514 uint8_t *dst_inner, ptrdiff_t stride_inner,
2515 uint8_t *l, int col, int x, int w,
2516 int row, int y, enum TxfmMode tx,
2517 int p, int ss_h, int ss_v, int bytesperpixel)
2519 int have_top = row > 0 || y > 0;
2520 int have_left = col > s->tiling.tile_col_start || x > 0;
2521 int have_right = x < w - 1;
2523 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2524 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2525 { DC_127_PRED, VERT_PRED } },
2526 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2527 { HOR_PRED, HOR_PRED } },
2528 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2529 { LEFT_DC_PRED, DC_PRED } },
2530 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2531 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2532 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2533 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2534 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2535 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2536 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2537 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2538 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2539 { DC_127_PRED, VERT_LEFT_PRED } },
2540 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2541 { HOR_UP_PRED, HOR_UP_PRED } },
2542 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2543 { HOR_PRED, TM_VP8_PRED } },
2545 static const struct {
2546 uint8_t needs_left:1;
2547 uint8_t needs_top:1;
2548 uint8_t needs_topleft:1;
2549 uint8_t needs_topright:1;
2550 uint8_t invert_left:1;
2551 } edges[N_INTRA_PRED_MODES] = {
2552 [VERT_PRED] = { .needs_top = 1 },
2553 [HOR_PRED] = { .needs_left = 1 },
2554 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2555 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2556 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2557 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2558 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2559 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2560 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2561 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2562 [LEFT_DC_PRED] = { .needs_left = 1 },
2563 [TOP_DC_PRED] = { .needs_top = 1 },
2564 [DC_128_PRED] = { 0 },
2565 [DC_127_PRED] = { 0 },
2566 [DC_129_PRED] = { 0 }
2569 av_assert2(mode >= 0 && mode < 10);
2570 mode = mode_conv[mode][have_left][have_top];
2571 if (edges[mode].needs_top) {
2572 uint8_t *top, *topleft;
2573 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2574 int n_px_need_tr = 0;
2576 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2579 // if top of sb64-row, use s->intra_pred_data[] instead of
2580 // dst[-stride] for intra prediction (it contains pre- instead of
2581 // post-loopfilter data)
2583 top = !(row & 7) && !y ?
2584 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2585 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2587 topleft = !(row & 7) && !y ?
2588 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2589 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2590 &dst_inner[-stride_inner];
2594 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2595 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2596 n_px_need + n_px_need_tr <= n_px_have) {
2600 if (n_px_need <= n_px_have) {
2601 memcpy(*a, top, n_px_need * bytesperpixel);
2603 #define memset_bpp(c, i1, v, i2, num) do { \
2604 if (bytesperpixel == 1) { \
2605 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2607 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2608 for (n = 0; n < (num); n++) { \
2609 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2613 memcpy(*a, top, n_px_have * bytesperpixel);
2614 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2617 #define memset_val(c, val, num) do { \
2618 if (bytesperpixel == 1) { \
2619 memset((c), (val), (num)); \
2622 for (n = 0; n < (num); n++) { \
2623 AV_WN16A(&(c)[n * 2], (val)); \
2627 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2629 if (edges[mode].needs_topleft) {
2630 if (have_left && have_top) {
2631 #define assign_bpp(c, i1, v, i2) do { \
2632 if (bytesperpixel == 1) { \
2633 (c)[(i1)] = (v)[(i2)]; \
2635 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2638 assign_bpp(*a, -1, topleft, -1);
2640 #define assign_val(c, i, v) do { \
2641 if (bytesperpixel == 1) { \
2644 AV_WN16A(&(c)[(i) * 2], (v)); \
2647 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2650 if (tx == TX_4X4 && edges[mode].needs_topright) {
2651 if (have_top && have_right &&
2652 n_px_need + n_px_need_tr <= n_px_have) {
2653 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2655 memset_bpp(*a, 4, *a, 3, 4);
2660 if (edges[mode].needs_left) {
2662 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2663 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2664 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2666 if (edges[mode].invert_left) {
2667 if (n_px_need <= n_px_have) {
2668 for (i = 0; i < n_px_need; i++)
2669 assign_bpp(l, i, &dst[i * stride], -1);
2671 for (i = 0; i < n_px_have; i++)
2672 assign_bpp(l, i, &dst[i * stride], -1);
2673 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2676 if (n_px_need <= n_px_have) {
2677 for (i = 0; i < n_px_need; i++)
2678 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2680 for (i = 0; i < n_px_have; i++)
2681 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2682 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2686 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2693 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2694 ptrdiff_t uv_off, int bytesperpixel)
2696 VP9Context *s = ctx->priv_data;
2698 int row = s->row, col = s->col;
2699 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2700 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2701 int end_x = FFMIN(2 * (s->cols - col), w4);
2702 int end_y = FFMIN(2 * (s->rows - row), h4);
2703 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2704 int uvstep1d = 1 << b->uvtx, p;
2705 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2706 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2707 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2709 for (n = 0, y = 0; y < end_y; y += step1d) {
2710 uint8_t *ptr = dst, *ptr_r = dst_r;
2711 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2712 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2713 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2715 uint8_t *a = &a_buf[32];
2716 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2717 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2719 mode = check_intra_mode(s, mode, &a, ptr_r,
2720 s->frames[CUR_FRAME].tf.f->linesize[0],
2721 ptr, s->y_stride, l,
2722 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2723 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2725 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2726 s->block + 16 * n * bytesperpixel, eob);
2728 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2729 dst += 4 * step1d * s->y_stride;
2736 step = 1 << (b->uvtx * 2);
2737 for (p = 0; p < 2; p++) {
2738 dst = s->dst[1 + p];
2739 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2740 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2741 uint8_t *ptr = dst, *ptr_r = dst_r;
2742 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2743 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2744 int mode = b->uvmode;
2745 uint8_t *a = &a_buf[32];
2746 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2748 mode = check_intra_mode(s, mode, &a, ptr_r,
2749 s->frames[CUR_FRAME].tf.f->linesize[1],
2750 ptr, s->uv_stride, l, col, x, w4, row, y,
2751 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2752 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2754 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2755 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2757 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2758 dst += 4 * uvstep1d * s->uv_stride;
2763 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2765 intra_recon(ctx, y_off, uv_off, 1);
2768 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2770 intra_recon(ctx, y_off, uv_off, 2);
2773 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2774 uint8_t *dst, ptrdiff_t dst_stride,
2775 const uint8_t *ref, ptrdiff_t ref_stride,
2776 ThreadFrame *ref_frame,
2777 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2778 int px, int py, int pw, int ph,
2779 int bw, int bh, int w, int h, int bytesperpixel,
2780 const uint16_t *scale, const uint8_t *step)
2782 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2784 int refbw_m1, refbh_m1;
2788 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2789 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2790 // BUG libvpx seems to scale the two components separately. This introduces
2791 // rounding errors but we have to reproduce them to be exactly compatible
2792 // with the output from libvpx...
2793 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2794 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2798 ref += y * ref_stride + x * bytesperpixel;
2801 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2802 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2803 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2804 // we use +7 because the last 7 pixels of each sbrow can be changed in
2805 // the longest loopfilter of the next sbrow
2806 th = (y + refbh_m1 + 4 + 7) >> 6;
2807 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2808 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2809 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2810 ref - 3 * ref_stride - 3 * bytesperpixel,
2812 refbw_m1 + 8, refbh_m1 + 8,
2813 x - 3, y - 3, w, h);
2814 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2817 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2820 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2821 uint8_t *dst_u, uint8_t *dst_v,
2822 ptrdiff_t dst_stride,
2823 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2824 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2825 ThreadFrame *ref_frame,
2826 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2827 int px, int py, int pw, int ph,
2828 int bw, int bh, int w, int h, int bytesperpixel,
2829 const uint16_t *scale, const uint8_t *step)
2832 int refbw_m1, refbh_m1;
2837 // BUG https://code.google.com/p/webm/issues/detail?id=820
2838 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 4, (s->cols * 4 - x + px + 3) << 4);
2839 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2841 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) << 3, (s->cols * 8 - x + px + 3) << 3);
2842 mx = scale_mv(mv.x << 1, 0) + scale_mv(x * 16, 0);
2845 // BUG https://code.google.com/p/webm/issues/detail?id=820
2846 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 4, (s->rows * 4 - y + py + 3) << 4);
2847 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2849 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) << 3, (s->rows * 8 - y + py + 3) << 3);
2850 my = scale_mv(mv.y << 1, 1) + scale_mv(y * 16, 1);
2855 ref_u += y * src_stride_u + x * bytesperpixel;
2856 ref_v += y * src_stride_v + x * bytesperpixel;
2859 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2860 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2861 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2862 // we use +7 because the last 7 pixels of each sbrow can be changed in
2863 // the longest loopfilter of the next sbrow
2864 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2865 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2866 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2867 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2868 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2870 refbw_m1 + 8, refbh_m1 + 8,
2871 x - 3, y - 3, w, h);
2872 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2873 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2875 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2876 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2878 refbw_m1 + 8, refbh_m1 + 8,
2879 x - 3, y - 3, w, h);
2880 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2881 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2883 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2884 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2888 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2889 px, py, pw, ph, bw, bh, w, h, i) \
2890 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2891 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2892 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2893 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2894 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2895 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2896 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2897 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2899 #define FN(x) x##_scaled_8bpp
2900 #define BYTES_PER_PIXEL 1
2901 #include "vp9_mc_template.c"
2903 #undef BYTES_PER_PIXEL
2904 #define FN(x) x##_scaled_16bpp
2905 #define BYTES_PER_PIXEL 2
2906 #include "vp9_mc_template.c"
2908 #undef mc_chroma_dir
2910 #undef BYTES_PER_PIXEL
2913 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2914 uint8_t *dst, ptrdiff_t dst_stride,
2915 const uint8_t *ref, ptrdiff_t ref_stride,
2916 ThreadFrame *ref_frame,
2917 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2918 int bw, int bh, int w, int h, int bytesperpixel)
2920 int mx = mv->x, my = mv->y, th;
2924 ref += y * ref_stride + x * bytesperpixel;
2927 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2928 // we use +7 because the last 7 pixels of each sbrow can be changed in
2929 // the longest loopfilter of the next sbrow
2930 th = (y + bh + 4 * !!my + 7) >> 6;
2931 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2932 if (x < !!mx * 3 || y < !!my * 3 ||
2933 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2934 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2935 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2937 bw + !!mx * 7, bh + !!my * 7,
2938 x - !!mx * 3, y - !!my * 3, w, h);
2939 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2942 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2945 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2946 uint8_t *dst_u, uint8_t *dst_v,
2947 ptrdiff_t dst_stride,
2948 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2949 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2950 ThreadFrame *ref_frame,
2951 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2952 int bw, int bh, int w, int h, int bytesperpixel)
2954 int mx = mv->x << !s->ss_h, my = mv->y << !s->ss_v, th;
2958 ref_u += y * src_stride_u + x * bytesperpixel;
2959 ref_v += y * src_stride_v + x * bytesperpixel;
2962 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2963 // we use +7 because the last 7 pixels of each sbrow can be changed in
2964 // the longest loopfilter of the next sbrow
2965 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2966 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2967 if (x < !!mx * 3 || y < !!my * 3 ||
2968 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2969 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2970 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2972 bw + !!mx * 7, bh + !!my * 7,
2973 x - !!mx * 3, y - !!my * 3, w, h);
2974 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2975 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2977 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2978 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2980 bw + !!mx * 7, bh + !!my * 7,
2981 x - !!mx * 3, y - !!my * 3, w, h);
2982 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2983 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2985 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2986 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2990 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2991 px, py, pw, ph, bw, bh, w, h, i) \
2992 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2993 mv, bw, bh, w, h, bytesperpixel)
2994 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2995 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2996 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2997 row, col, mv, bw, bh, w, h, bytesperpixel)
2999 #define FN(x) x##_8bpp
3000 #define BYTES_PER_PIXEL 1
3001 #include "vp9_mc_template.c"
3003 #undef BYTES_PER_PIXEL
3004 #define FN(x) x##_16bpp
3005 #define BYTES_PER_PIXEL 2
3006 #include "vp9_mc_template.c"
3007 #undef mc_luma_dir_dir
3008 #undef mc_chroma_dir_dir
3010 #undef BYTES_PER_PIXEL
3013 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3015 VP9Context *s = ctx->priv_data;
3017 int row = s->row, col = s->col;
3019 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3020 if (bytesperpixel == 1) {
3021 inter_pred_scaled_8bpp(ctx);
3023 inter_pred_scaled_16bpp(ctx);
3026 if (bytesperpixel == 1) {
3027 inter_pred_8bpp(ctx);
3029 inter_pred_16bpp(ctx);
3033 /* mostly copied intra_recon() */
3035 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3036 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3037 int end_x = FFMIN(2 * (s->cols - col), w4);
3038 int end_y = FFMIN(2 * (s->rows - row), h4);
3039 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
3040 int uvstep1d = 1 << b->uvtx, p;
3041 uint8_t *dst = s->dst[0];
3044 for (n = 0, y = 0; y < end_y; y += step1d) {
3046 for (x = 0; x < end_x; x += step1d,
3047 ptr += 4 * step1d * bytesperpixel, n += step) {
3048 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3051 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3052 s->block + 16 * n * bytesperpixel, eob);
3054 dst += 4 * s->y_stride * step1d;
3060 step = 1 << (b->uvtx * 2);
3061 for (p = 0; p < 2; p++) {
3062 dst = s->dst[p + 1];
3063 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3065 for (x = 0; x < end_x; x += uvstep1d,
3066 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3067 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3070 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3071 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3073 dst += 4 * uvstep1d * s->uv_stride;
3079 static void inter_recon_8bpp(AVCodecContext *ctx)
3081 inter_recon(ctx, 1);
3084 static void inter_recon_16bpp(AVCodecContext *ctx)
3086 inter_recon(ctx, 2);
3089 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3090 int row_and_7, int col_and_7,
3091 int w, int h, int col_end, int row_end,
3092 enum TxfmMode tx, int skip_inter)
3094 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3095 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3097 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3098 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3099 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3100 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3102 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3103 // edges. This means that for UV, we work on two subsampled blocks at
3104 // a time, and we only use the topleft block's mode information to set
3105 // things like block strength. Thus, for any block size smaller than
3106 // 16x16, ignore the odd portion of the block.
3107 if (tx == TX_4X4 && (ss_v | ss_h)) {
3122 if (tx == TX_4X4 && !skip_inter) {
3123 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3124 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3125 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3127 for (y = row_and_7; y < h + row_and_7; y++) {
3128 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3130 mask[0][y][1] |= m_row_8;
3131 mask[0][y][2] |= m_row_4;
3132 // for odd lines, if the odd col is not being filtered,
3133 // skip odd row also:
3140 // if a/c are even row/col and b/d are odd, and d is skipped,
3141 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3142 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3143 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3145 mask[1][y][col_mask_id] |= m_col;
3148 mask[0][y][3] |= m_col;
3150 if (ss_h && (col_end & 1))
3151 mask[1][y][3] |= (t << (w - 1)) - t;
3153 mask[1][y][3] |= m_col;
3157 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3160 int mask_id = (tx == TX_8X8);
3161 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3162 int l2 = tx + ss_h - 1, step1d;
3163 int m_row = m_col & masks[l2];
3165 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3166 // 8wd loopfilter to prevent going off the visible edge.
3167 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3168 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3169 int m_row_8 = m_row - m_row_16;
3171 for (y = row_and_7; y < h + row_and_7; y++) {
3172 mask[0][y][0] |= m_row_16;
3173 mask[0][y][1] |= m_row_8;
3176 for (y = row_and_7; y < h + row_and_7; y++)
3177 mask[0][y][mask_id] |= m_row;
3182 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3183 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3184 mask[1][y][0] |= m_col;
3185 if (y - row_and_7 == h - 1)
3186 mask[1][y][1] |= m_col;
3188 for (y = row_and_7; y < h + row_and_7; y += step1d)
3189 mask[1][y][mask_id] |= m_col;
3191 } else if (tx != TX_4X4) {
3194 mask_id = (tx == TX_8X8) || (h == ss_v);
3195 mask[1][row_and_7][mask_id] |= m_col;
3196 mask_id = (tx == TX_8X8) || (w == ss_h);
3197 for (y = row_and_7; y < h + row_and_7; y++)
3198 mask[0][y][mask_id] |= t;
3200 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3202 for (y = row_and_7; y < h + row_and_7; y++) {
3203 mask[0][y][2] |= t4;
3204 mask[0][y][1] |= t8;
3206 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3211 static void decode_b(AVCodecContext *ctx, int row, int col,
3212 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3213 enum BlockLevel bl, enum BlockPartition bp)
3215 VP9Context *s = ctx->priv_data;
3217 enum BlockSize bs = bl * 3 + bp;
3218 int bytesperpixel = s->bytesperpixel;
3219 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3221 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3227 s->min_mv.x = -(128 + col * 64);
3228 s->min_mv.y = -(128 + row * 64);
3229 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3230 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3236 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3237 (s->ss_v && h4 * 2 == (1 << b->tx)));
3242 if (bytesperpixel == 1) {
3243 has_coeffs = decode_coeffs_8bpp(ctx);
3245 has_coeffs = decode_coeffs_16bpp(ctx);
3247 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3249 memset(&s->above_skip_ctx[col], 1, w4);
3250 memset(&s->left_skip_ctx[s->row7], 1, h4);
3255 #define SPLAT_ZERO_CTX(v, n) \
3257 case 1: v = 0; break; \
3258 case 2: AV_ZERO16(&v); break; \
3259 case 4: AV_ZERO32(&v); break; \
3260 case 8: AV_ZERO64(&v); break; \
3261 case 16: AV_ZERO128(&v); break; \
3263 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3265 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3266 if (s->ss_##dir2) { \
3267 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3268 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3270 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3271 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3276 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3277 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3278 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3279 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3282 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3283 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3284 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3285 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3290 s->block += w4 * h4 * 64 * bytesperpixel;
3291 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3292 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3293 s->eob += 4 * w4 * h4;
3294 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3295 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3301 // emulated overhangs if the stride of the target buffer can't hold. This
3302 // makes it possible to support emu-edge and so on even if we have large block
3304 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3305 (row + h4) > s->rows;
3306 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3307 (row + h4) > s->rows;
3309 s->dst[0] = s->tmp_y;
3312 s->dst[0] = f->data[0] + yoff;
3313 s->y_stride = f->linesize[0];
3316 s->dst[1] = s->tmp_uv[0];
3317 s->dst[2] = s->tmp_uv[1];
3320 s->dst[1] = f->data[1] + uvoff;
3321 s->dst[2] = f->data[2] + uvoff;
3322 s->uv_stride = f->linesize[1];
3326 intra_recon_16bpp(ctx, yoff, uvoff);
3328 intra_recon_8bpp(ctx, yoff, uvoff);
3332 inter_recon_16bpp(ctx);
3334 inter_recon_8bpp(ctx);
3338 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3340 for (n = 0; o < w; n++) {
3345 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3346 s->tmp_y + o, 128, h, 0, 0);
3347 o += bw * bytesperpixel;
3352 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3353 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3355 for (n = s->ss_h; o < w; n++) {
3360 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3361 s->tmp_uv[0] + o, 128, h, 0, 0);
3362 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3363 s->tmp_uv[1] + o, 128, h, 0, 0);
3364 o += bw * bytesperpixel;
3369 // pick filter level and find edges to apply filter to
3370 if (s->filter.level &&
3371 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3372 [b->mode[3] != ZEROMV]) > 0) {
3373 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3374 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3376 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3377 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3378 if (s->ss_h || s->ss_v)
3379 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3380 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3381 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3382 b->uvtx, skip_inter);
3384 if (!s->filter.lim_lut[lvl]) {
3385 int sharp = s->filter.sharpness;
3389 limit >>= (sharp + 3) >> 2;
3390 limit = FFMIN(limit, 9 - sharp);
3392 limit = FFMAX(limit, 1);
3394 s->filter.lim_lut[lvl] = limit;
3395 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3401 s->block += w4 * h4 * 64 * bytesperpixel;
3402 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3403 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3404 s->eob += 4 * w4 * h4;
3405 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3406 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3410 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3411 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3413 VP9Context *s = ctx->priv_data;
3414 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3415 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3416 const uint8_t *p = s->keyframe || s->intraonly ? vp9_default_kf_partition_probs[bl][c] :
3417 s->prob.p.partition[bl][c];
3418 enum BlockPartition bp;
3419 ptrdiff_t hbs = 4 >> bl;
3420 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3421 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3422 int bytesperpixel = s->bytesperpixel;
3425 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3426 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3427 } else if (col + hbs < s->cols) { // FIXME why not <=?
3428 if (row + hbs < s->rows) { // FIXME why not <=?
3429 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3431 case PARTITION_NONE:
3432 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3435 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3436 yoff += hbs * 8 * y_stride;
3437 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3438 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3441 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3442 yoff += hbs * 8 * bytesperpixel;
3443 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3444 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3446 case PARTITION_SPLIT:
3447 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3448 decode_sb(ctx, row, col + hbs, lflvl,
3449 yoff + 8 * hbs * bytesperpixel,
3450 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3451 yoff += hbs * 8 * y_stride;
3452 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3453 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3454 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3455 yoff + 8 * hbs * bytesperpixel,
3456 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3461 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3462 bp = PARTITION_SPLIT;
3463 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3464 decode_sb(ctx, row, col + hbs, lflvl,
3465 yoff + 8 * hbs * bytesperpixel,
3466 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3469 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3471 } else if (row + hbs < s->rows) { // FIXME why not <=?
3472 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3473 bp = PARTITION_SPLIT;
3474 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3475 yoff += hbs * 8 * y_stride;
3476 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3477 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3480 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3483 bp = PARTITION_SPLIT;
3484 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3486 s->counts.partition[bl][c][bp]++;
3489 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3490 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3492 VP9Context *s = ctx->priv_data;
3494 ptrdiff_t hbs = 4 >> bl;
3495 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3496 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3497 int bytesperpixel = s->bytesperpixel;
3500 av_assert2(b->bl == BL_8X8);
3501 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3502 } else if (s->b->bl == bl) {
3503 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3504 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3505 yoff += hbs * 8 * y_stride;
3506 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3507 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3508 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3509 yoff += hbs * 8 * bytesperpixel;
3510 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3511 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3514 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3515 if (col + hbs < s->cols) { // FIXME why not <=?
3516 if (row + hbs < s->rows) {
3517 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3518 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3519 yoff += hbs * 8 * y_stride;
3520 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3521 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3522 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3523 yoff + 8 * hbs * bytesperpixel,
3524 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3526 yoff += hbs * 8 * bytesperpixel;
3527 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3528 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3530 } else if (row + hbs < s->rows) {
3531 yoff += hbs * 8 * y_stride;
3532 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3533 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3538 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3539 uint8_t *lvl, uint8_t (*mask)[4],
3540 uint8_t *dst, ptrdiff_t ls)
3542 int y, x, bytesperpixel = s->bytesperpixel;
3544 // filter edges between columns (e.g. block1 | block2)
3545 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3546 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3547 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3548 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3549 unsigned hm = hm1 | hm2 | hm13 | hm23;
3551 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3554 int L = *l, H = L >> 4;
3555 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3557 if (hmask1[0] & x) {
3558 if (hmask2[0] & x) {
3559 av_assert2(l[8 << ss_v] == L);
3560 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3562 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3564 } else if (hm2 & x) {
3567 E |= s->filter.mblim_lut[L] << 8;
3568 I |= s->filter.lim_lut[L] << 8;
3569 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3571 [0](ptr, ls, E, I, H);
3573 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3574 [0](ptr, ls, E, I, H);
3576 } else if (hm2 & x) {
3577 int L = l[8 << ss_v], H = L >> 4;
3578 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3580 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3581 [0](ptr + 8 * ls, ls, E, I, H);
3589 int L = *l, H = L >> 4;
3590 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3595 E |= s->filter.mblim_lut[L] << 8;
3596 I |= s->filter.lim_lut[L] << 8;
3597 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3599 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3601 } else if (hm23 & x) {
3602 int L = l[8 << ss_v], H = L >> 4;
3603 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3605 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3613 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3614 uint8_t *lvl, uint8_t (*mask)[4],
3615 uint8_t *dst, ptrdiff_t ls)
3617 int y, x, bytesperpixel = s->bytesperpixel;
3620 // filter edges between rows (e.g. ------)
3622 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3623 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3624 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3626 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3629 int L = *l, H = L >> 4;
3630 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3633 if (vmask[0] & (x << (1 + ss_h))) {
3634 av_assert2(l[1 + ss_h] == L);
3635 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3637 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3639 } else if (vm & (x << (1 + ss_h))) {
3642 E |= s->filter.mblim_lut[L] << 8;
3643 I |= s->filter.lim_lut[L] << 8;
3644 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3645 [!!(vmask[1] & (x << (1 + ss_h)))]
3646 [1](ptr, ls, E, I, H);
3648 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3649 [1](ptr, ls, E, I, H);
3651 } else if (vm & (x << (1 + ss_h))) {
3652 int L = l[1 + ss_h], H = L >> 4;
3653 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3655 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3656 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3661 int L = *l, H = L >> 4;
3662 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3664 if (vm3 & (x << (1 + ss_h))) {
3667 E |= s->filter.mblim_lut[L] << 8;
3668 I |= s->filter.lim_lut[L] << 8;
3669 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3671 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3673 } else if (vm3 & (x << (1 + ss_h))) {
3674 int L = l[1 + ss_h], H = L >> 4;
3675 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3677 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3690 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3691 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3693 VP9Context *s = ctx->priv_data;
3694 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3695 uint8_t *dst = f->data[0] + yoff;
3696 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3697 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3700 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3701 // if you think of them as acting on a 8x8 block max, we can interleave
3702 // each v/h within the single x loop, but that only works if we work on
3703 // 8 pixel blocks, and we won't always do that (we want at least 16px
3704 // to use SSE2 optimizations, perhaps 32 for AVX2)
3706 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3707 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3709 for (p = 0; p < 2; p++) {
3710 dst = f->data[1 + p] + uvoff;
3711 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3712 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3716 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3718 int sb_start = ( idx * n) >> log2_n;
3719 int sb_end = ((idx + 1) * n) >> log2_n;
3720 *start = FFMIN(sb_start, n) << 3;
3721 *end = FFMIN(sb_end, n) << 3;
3724 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3725 int max_count, int update_factor)
3727 unsigned ct = ct0 + ct1, p2, p1;
3733 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3734 p2 = av_clip(p2, 1, 255);
3735 ct = FFMIN(ct, max_count);
3736 update_factor = FASTDIV(update_factor * ct, max_count);
3738 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3739 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3742 static void adapt_probs(VP9Context *s)
3745 prob_context *p = &s->prob_ctx[s->framectxid].p;
3746 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3749 for (i = 0; i < 4; i++)
3750 for (j = 0; j < 2; j++)
3751 for (k = 0; k < 2; k++)
3752 for (l = 0; l < 6; l++)
3753 for (m = 0; m < 6; m++) {
3754 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3755 unsigned *e = s->counts.eob[i][j][k][l][m];
3756 unsigned *c = s->counts.coef[i][j][k][l][m];
3758 if (l == 0 && m >= 3) // dc only has 3 pt
3761 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3762 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3763 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3766 if (s->keyframe || s->intraonly) {
3767 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3768 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3769 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3770 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3775 for (i = 0; i < 3; i++)
3776 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3779 for (i = 0; i < 4; i++)
3780 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3783 if (s->comppredmode == PRED_SWITCHABLE) {
3784 for (i = 0; i < 5; i++)
3785 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3789 if (s->comppredmode != PRED_SINGLEREF) {
3790 for (i = 0; i < 5; i++)
3791 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3792 s->counts.comp_ref[i][1], 20, 128);
3795 if (s->comppredmode != PRED_COMPREF) {
3796 for (i = 0; i < 5; i++) {
3797 uint8_t *pp = p->single_ref[i];
3798 unsigned (*c)[2] = s->counts.single_ref[i];
3800 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3801 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3805 // block partitioning
3806 for (i = 0; i < 4; i++)
3807 for (j = 0; j < 4; j++) {
3808 uint8_t *pp = p->partition[i][j];
3809 unsigned *c = s->counts.partition[i][j];
3811 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3812 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3813 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3817 if (s->txfmmode == TX_SWITCHABLE) {
3818 for (i = 0; i < 2; i++) {
3819 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3821 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3822 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3823 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3824 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3825 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3826 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3830 // interpolation filter
3831 if (s->filtermode == FILTER_SWITCHABLE) {
3832 for (i = 0; i < 4; i++) {
3833 uint8_t *pp = p->filter[i];
3834 unsigned *c = s->counts.filter[i];
3836 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3837 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3842 for (i = 0; i < 7; i++) {
3843 uint8_t *pp = p->mv_mode[i];
3844 unsigned *c = s->counts.mv_mode[i];
3846 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3847 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3848 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3853 uint8_t *pp = p->mv_joint;
3854 unsigned *c = s->counts.mv_joint;
3856 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3857 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3858 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3862 for (i = 0; i < 2; i++) {
3864 unsigned *c, (*c2)[2], sum;
3866 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3867 s->counts.mv_comp[i].sign[1], 20, 128);
3869 pp = p->mv_comp[i].classes;
3870 c = s->counts.mv_comp[i].classes;
3871 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3872 adapt_prob(&pp[0], c[0], sum, 20, 128);
3874 adapt_prob(&pp[1], c[1], sum, 20, 128);
3876 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3877 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3879 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3880 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3882 adapt_prob(&pp[6], c[6], sum, 20, 128);
3883 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3884 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3885 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3887 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3888 s->counts.mv_comp[i].class0[1], 20, 128);
3889 pp = p->mv_comp[i].bits;
3890 c2 = s->counts.mv_comp[i].bits;
3891 for (j = 0; j < 10; j++)
3892 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3894 for (j = 0; j < 2; j++) {
3895 pp = p->mv_comp[i].class0_fp[j];
3896 c = s->counts.mv_comp[i].class0_fp[j];
3897 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3898 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3899 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3901 pp = p->mv_comp[i].fp;
3902 c = s->counts.mv_comp[i].fp;
3903 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3904 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3905 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3907 if (s->highprecisionmvs) {
3908 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3909 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3910 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3911 s->counts.mv_comp[i].hp[1], 20, 128);
3916 for (i = 0; i < 4; i++) {
3917 uint8_t *pp = p->y_mode[i];
3918 unsigned *c = s->counts.y_mode[i], sum, s2;
3920 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3921 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3922 sum -= c[TM_VP8_PRED];
3923 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3924 sum -= c[VERT_PRED];
3925 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3926 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3928 adapt_prob(&pp[3], s2, sum, 20, 128);
3930 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3931 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3932 sum -= c[DIAG_DOWN_LEFT_PRED];
3933 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3934 sum -= c[VERT_LEFT_PRED];
3935 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3936 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3940 for (i = 0; i < 10; i++) {
3941 uint8_t *pp = p->uv_mode[i];
3942 unsigned *c = s->counts.uv_mode[i], sum, s2;
3944 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3945 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3946 sum -= c[TM_VP8_PRED];
3947 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3948 sum -= c[VERT_PRED];
3949 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3950 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3952 adapt_prob(&pp[3], s2, sum, 20, 128);
3954 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3955 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3956 sum -= c[DIAG_DOWN_LEFT_PRED];
3957 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3958 sum -= c[VERT_LEFT_PRED];
3959 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3960 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3964 static void free_buffers(VP9Context *s)
3966 av_freep(&s->intra_pred_data[0]);
3967 av_freep(&s->b_base);
3968 av_freep(&s->block_base);
3971 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3973 VP9Context *s = ctx->priv_data;
3976 for (i = 0; i < 3; i++) {
3977 if (s->frames[i].tf.f->data[0])
3978 vp9_unref_frame(ctx, &s->frames[i]);
3979 av_frame_free(&s->frames[i].tf.f);
3981 for (i = 0; i < 8; i++) {
3982 if (s->refs[i].f->data[0])
3983 ff_thread_release_buffer(ctx, &s->refs[i]);
3984 av_frame_free(&s->refs[i].f);
3985 if (s->next_refs[i].f->data[0])
3986 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3987 av_frame_free(&s->next_refs[i].f);
3997 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3998 int *got_frame, AVPacket *pkt)
4000 const uint8_t *data = pkt->data;
4001 int size = pkt->size;
4002 VP9Context *s = ctx->priv_data;
4003 int res, tile_row, tile_col, i, ref, row, col;
4004 int retain_segmap_ref = s->frames[REF_FRAME_SEGMAP].segmentation_map &&
4005 !(s->segmentation.enabled &&
4006 (s->segmentation.update_map || s->keyframe || s->intraonly));
4007 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4011 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4013 } else if (res == 0) {
4014 if (!s->refs[ref].f->data[0]) {
4015 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4016 return AVERROR_INVALIDDATA;
4018 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
4020 ((AVFrame *)frame)->pkt_pts = pkt->pts;
4021 ((AVFrame *)frame)->pkt_dts = pkt->dts;
4022 for (i = 0; i < 8; i++) {
4023 if (s->next_refs[i].f->data[0])
4024 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4025 if (s->refs[i].f->data[0] &&
4026 (res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i])) < 0)
4035 if (!retain_segmap_ref) {
4036 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
4037 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
4038 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4039 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
4042 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
4043 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
4044 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
4045 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
4047 if (s->frames[CUR_FRAME].tf.f->data[0])
4048 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
4049 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
4051 f = s->frames[CUR_FRAME].tf.f;
4052 f->key_frame = s->keyframe;
4053 f->pict_type = (s->keyframe || s->intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4054 ls_y = f->linesize[0];
4055 ls_uv =f->linesize[1];
4058 for (i = 0; i < 8; i++) {
4059 if (s->next_refs[i].f->data[0])
4060 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4061 if (s->refreshrefmask & (1 << i)) {
4062 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
4063 } else if (s->refs[i].f->data[0]) {
4064 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
4070 // main tile decode loop
4071 bytesperpixel = s->bytesperpixel;
4072 memset(s->above_partition_ctx, 0, s->cols);
4073 memset(s->above_skip_ctx, 0, s->cols);
4074 if (s->keyframe || s->intraonly) {
4075 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4077 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4079 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4080 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4081 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4082 memset(s->above_segpred_ctx, 0, s->cols);
4083 s->pass = s->frames[CUR_FRAME].uses_2pass =
4084 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
4085 if ((res = update_block_buffers(ctx)) < 0) {
4086 av_log(ctx, AV_LOG_ERROR,
4087 "Failed to allocate block buffers\n");
4090 if (s->refreshctx && s->parallelmode) {
4093 for (i = 0; i < 4; i++) {
4094 for (j = 0; j < 2; j++)
4095 for (k = 0; k < 2; k++)
4096 for (l = 0; l < 6; l++)
4097 for (m = 0; m < 6; m++)
4098 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
4099 s->prob.coef[i][j][k][l][m], 3);
4100 if (s->txfmmode == i)
4103 s->prob_ctx[s->framectxid].p = s->prob.p;
4104 ff_thread_finish_setup(ctx);
4105 } else if (!s->refreshctx) {
4106 ff_thread_finish_setup(ctx);
4112 s->block = s->block_base;
4113 s->uvblock[0] = s->uvblock_base[0];
4114 s->uvblock[1] = s->uvblock_base[1];
4115 s->eob = s->eob_base;
4116 s->uveob[0] = s->uveob_base[0];
4117 s->uveob[1] = s->uveob_base[1];
4119 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
4120 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
4121 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
4123 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4126 if (tile_col == s->tiling.tile_cols - 1 &&
4127 tile_row == s->tiling.tile_rows - 1) {
4130 tile_size = AV_RB32(data);
4134 if (tile_size > size) {
4135 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4136 return AVERROR_INVALIDDATA;
4138 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4139 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4140 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4141 return AVERROR_INVALIDDATA;
4148 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
4149 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4150 struct VP9Filter *lflvl_ptr = s->lflvl;
4151 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4153 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
4154 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
4155 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
4158 memset(s->left_partition_ctx, 0, 8);
4159 memset(s->left_skip_ctx, 0, 8);
4160 if (s->keyframe || s->intraonly) {
4161 memset(s->left_mode_ctx, DC_PRED, 16);
4163 memset(s->left_mode_ctx, NEARESTMV, 8);
4165 memset(s->left_y_nnz_ctx, 0, 16);
4166 memset(s->left_uv_nnz_ctx, 0, 32);
4167 memset(s->left_segpred_ctx, 0, 8);
4169 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4172 for (col = s->tiling.tile_col_start;
4173 col < s->tiling.tile_col_end;
4174 col += 8, yoff2 += 64 * bytesperpixel,
4175 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4176 // FIXME integrate with lf code (i.e. zero after each
4177 // use, similar to invtxfm coefficients, or similar)
4179 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4183 decode_sb_mem(ctx, row, col, lflvl_ptr,
4184 yoff2, uvoff2, BL_64X64);
4186 decode_sb(ctx, row, col, lflvl_ptr,
4187 yoff2, uvoff2, BL_64X64);
4191 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4199 // backup pre-loopfilter reconstruction data for intra
4200 // prediction of next row of sb64s
4201 if (row + 8 < s->rows) {
4202 memcpy(s->intra_pred_data[0],
4203 f->data[0] + yoff + 63 * ls_y,
4204 8 * s->cols * bytesperpixel);
4205 memcpy(s->intra_pred_data[1],
4206 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4207 8 * s->cols * bytesperpixel >> s->ss_h);
4208 memcpy(s->intra_pred_data[2],
4209 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4210 8 * s->cols * bytesperpixel >> s->ss_h);
4213 // loopfilter one row
4214 if (s->filter.level) {
4217 lflvl_ptr = s->lflvl;
4218 for (col = 0; col < s->cols;
4219 col += 8, yoff2 += 64 * bytesperpixel,
4220 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4221 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4225 // FIXME maybe we can make this more finegrained by running the
4226 // loopfilter per-block instead of after each sbrow
4227 // In fact that would also make intra pred left preparation easier?
4228 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
4232 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
4234 ff_thread_finish_setup(ctx);
4236 } while (s->pass++ == 1);
4237 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
4240 for (i = 0; i < 8; i++) {
4241 if (s->refs[i].f->data[0])
4242 ff_thread_release_buffer(ctx, &s->refs[i]);
4243 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
4246 if (!s->invisible) {
4247 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4255 static void vp9_decode_flush(AVCodecContext *ctx)
4257 VP9Context *s = ctx->priv_data;
4260 for (i = 0; i < 3; i++)
4261 vp9_unref_frame(ctx, &s->frames[i]);
4262 for (i = 0; i < 8; i++)
4263 ff_thread_release_buffer(ctx, &s->refs[i]);
4266 static int init_frames(AVCodecContext *ctx)
4268 VP9Context *s = ctx->priv_data;
4271 for (i = 0; i < 3; i++) {
4272 s->frames[i].tf.f = av_frame_alloc();
4273 if (!s->frames[i].tf.f) {
4274 vp9_decode_free(ctx);
4275 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4276 return AVERROR(ENOMEM);
4279 for (i = 0; i < 8; i++) {
4280 s->refs[i].f = av_frame_alloc();
4281 s->next_refs[i].f = av_frame_alloc();
4282 if (!s->refs[i].f || !s->next_refs[i].f) {
4283 vp9_decode_free(ctx);
4284 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4285 return AVERROR(ENOMEM);
4292 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4294 VP9Context *s = ctx->priv_data;
4296 ctx->internal->allocate_progress = 1;
4298 s->filter.sharpness = -1;
4300 return init_frames(ctx);
4303 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4305 return init_frames(avctx);
4308 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4311 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4313 // detect size changes in other threads
4314 if (s->intra_pred_data[0] &&
4315 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4319 for (i = 0; i < 3; i++) {
4320 if (s->frames[i].tf.f->data[0])
4321 vp9_unref_frame(dst, &s->frames[i]);
4322 if (ssrc->frames[i].tf.f->data[0]) {
4323 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4327 for (i = 0; i < 8; i++) {
4328 if (s->refs[i].f->data[0])
4329 ff_thread_release_buffer(dst, &s->refs[i]);
4330 if (ssrc->next_refs[i].f->data[0]) {
4331 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4336 s->invisible = ssrc->invisible;
4337 s->keyframe = ssrc->keyframe;
4338 s->intraonly = ssrc->intraonly;
4339 s->ss_v = ssrc->ss_v;
4340 s->ss_h = ssrc->ss_h;
4341 s->segmentation.enabled = ssrc->segmentation.enabled;
4342 s->segmentation.update_map = ssrc->segmentation.update_map;
4343 s->bytesperpixel = ssrc->bytesperpixel;
4345 s->bpp_index = ssrc->bpp_index;
4346 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4347 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4348 if (ssrc->segmentation.enabled) {
4349 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4350 sizeof(s->segmentation.feat));
4356 static const AVProfile profiles[] = {
4357 { FF_PROFILE_VP9_0, "Profile 0" },
4358 { FF_PROFILE_VP9_1, "Profile 1" },
4359 { FF_PROFILE_VP9_2, "Profile 2" },
4360 { FF_PROFILE_VP9_3, "Profile 3" },
4361 { FF_PROFILE_UNKNOWN },
4364 AVCodec ff_vp9_decoder = {
4366 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4367 .type = AVMEDIA_TYPE_VIDEO,
4368 .id = AV_CODEC_ID_VP9,
4369 .priv_data_size = sizeof(VP9Context),
4370 .init = vp9_decode_init,
4371 .close = vp9_decode_free,
4372 .decode = vp9_decode_frame,
4373 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4374 .flush = vp9_decode_flush,
4375 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4376 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4377 .profiles = NULL_IF_CONFIG_SMALL(profiles),