2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "libavutil/avassert.h"
35 #include "libavutil/pixdesc.h"
37 #define VP9_SYNCCODE 0x498342
41 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
42 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
45 typedef struct VP9Block {
46 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
47 enum FilterMode filter;
48 VP56mv mv[4 /* b_idx */][2 /* ref */];
50 enum TxfmMode tx, uvtx;
52 enum BlockPartition bp;
55 typedef struct VP9Context {
66 int row, row7, col, col7;
68 ptrdiff_t y_stride, uv_stride;
71 uint8_t last_bpp, bpp_index, bytesperpixel;
72 uint8_t last_keyframe;
73 // sb_cols/rows, rows/cols and last_fmt are used for allocating all internal
74 // arrays, and are thus per-thread. w/h and gf_fmt are synced between threads
75 // and are therefore per-stream. pix_fmt represents the value in the header
76 // of the currently processed frame.
78 enum AVPixelFormat pix_fmt, last_fmt, gf_fmt;
79 unsigned sb_cols, sb_rows, rows, cols;
80 ThreadFrame next_refs[8];
84 uint8_t mblim_lut[64];
86 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
89 uint8_t coef[4][2][2][6][6][3];
93 uint8_t coef[4][2][2][6][6][11];
96 unsigned y_mode[4][10];
97 unsigned uv_mode[10][10];
98 unsigned filter[4][3];
99 unsigned mv_mode[7][4];
100 unsigned intra[4][2];
102 unsigned single_ref[5][2][2];
103 unsigned comp_ref[5][2];
104 unsigned tx32p[2][4];
105 unsigned tx16p[2][3];
108 unsigned mv_joint[4];
111 unsigned classes[11];
113 unsigned bits[10][2];
114 unsigned class0_fp[2][4];
116 unsigned class0_hp[2];
119 unsigned partition[4][4][4];
120 unsigned coef[4][2][2][6][6][3];
121 unsigned eob[4][2][2][6][6][2];
124 // contextual (left/above) cache
125 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
126 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
127 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
128 DECLARE_ALIGNED(16, uint8_t, left_uv_nnz_ctx)[2][16];
129 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
130 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
131 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
132 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
133 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
134 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
135 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
136 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
137 uint8_t *above_partition_ctx;
138 uint8_t *above_mode_ctx;
139 // FIXME maybe merge some of the below in a flags field?
140 uint8_t *above_y_nnz_ctx;
141 uint8_t *above_uv_nnz_ctx[2];
142 uint8_t *above_skip_ctx; // 1bit
143 uint8_t *above_txfm_ctx; // 2bit
144 uint8_t *above_segpred_ctx; // 1bit
145 uint8_t *above_intra_ctx; // 1bit
146 uint8_t *above_comp_ctx; // 1bit
147 uint8_t *above_ref_ctx; // 2bit
148 uint8_t *above_filter_ctx;
149 VP56mv (*above_mv_ctx)[2];
152 uint8_t *intra_pred_data[3];
153 struct VP9Filter *lflvl;
154 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135 * 144 * 2];
156 // block reconstruction intermediates
157 int block_alloc_using_2pass;
158 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
159 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
160 struct { int x, y; } min_mv, max_mv;
161 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64 * 64 * 2];
162 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][64 * 64 * 2];
163 uint16_t mvscale[3][2];
164 uint8_t mvstep[3][2];
167 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
169 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
170 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
172 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
173 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
177 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
179 ff_thread_release_buffer(ctx, &f->tf);
180 av_buffer_unref(&f->extradata);
181 av_buffer_unref(&f->hwaccel_priv_buf);
182 f->segmentation_map = NULL;
183 f->hwaccel_picture_private = NULL;
186 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
188 VP9Context *s = ctx->priv_data;
191 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
193 sz = 64 * s->sb_cols * s->sb_rows;
194 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
198 f->segmentation_map = f->extradata->data;
199 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
202 const AVHWAccel *hwaccel = ctx->hwaccel;
203 av_assert0(!f->hwaccel_picture_private);
204 if (hwaccel->frame_priv_data_size) {
205 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
206 if (!f->hwaccel_priv_buf)
208 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
215 vp9_unref_frame(ctx, f);
216 return AVERROR(ENOMEM);
219 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
223 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
225 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
229 dst->segmentation_map = src->segmentation_map;
231 dst->uses_2pass = src->uses_2pass;
233 if (src->hwaccel_picture_private) {
234 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
235 if (!dst->hwaccel_priv_buf)
237 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
243 vp9_unref_frame(ctx, dst);
244 return AVERROR(ENOMEM);
247 static int update_size(AVCodecContext *ctx, int w, int h)
249 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
250 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
251 VP9Context *s = ctx->priv_data;
253 int bytesperpixel = s->bytesperpixel, res, cols, rows;
255 av_assert0(w > 0 && h > 0);
257 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
258 if ((res = ff_set_dimensions(ctx, w, h)) < 0)
261 switch (s->pix_fmt) {
262 case AV_PIX_FMT_YUV420P:
263 #if CONFIG_VP9_DXVA2_HWACCEL
264 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
266 #if CONFIG_VP9_D3D11VA_HWACCEL
267 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
269 #if CONFIG_VP9_VAAPI_HWACCEL
270 *fmtp++ = AV_PIX_FMT_VAAPI;
273 case AV_PIX_FMT_YUV420P10:
274 case AV_PIX_FMT_YUV420P12:
275 #if CONFIG_VP9_VAAPI_HWACCEL
276 *fmtp++ = AV_PIX_FMT_VAAPI;
281 *fmtp++ = s->pix_fmt;
282 *fmtp = AV_PIX_FMT_NONE;
284 res = ff_thread_get_format(ctx, pix_fmts);
289 s->gf_fmt = s->pix_fmt;
297 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
300 s->last_fmt = s->pix_fmt;
301 s->sb_cols = (w + 63) >> 6;
302 s->sb_rows = (h + 63) >> 6;
303 s->cols = (w + 7) >> 3;
304 s->rows = (h + 7) >> 3;
306 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
307 av_freep(&s->intra_pred_data[0]);
308 // FIXME we slightly over-allocate here for subsampled chroma, but a little
309 // bit of padding shouldn't affect performance...
310 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
311 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
313 return AVERROR(ENOMEM);
314 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
315 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
316 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
317 assign(s->above_y_nnz_ctx, uint8_t *, 16);
318 assign(s->above_mode_ctx, uint8_t *, 16);
319 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
320 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
321 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
322 assign(s->above_partition_ctx, uint8_t *, 8);
323 assign(s->above_skip_ctx, uint8_t *, 8);
324 assign(s->above_txfm_ctx, uint8_t *, 8);
325 assign(s->above_segpred_ctx, uint8_t *, 8);
326 assign(s->above_intra_ctx, uint8_t *, 8);
327 assign(s->above_comp_ctx, uint8_t *, 8);
328 assign(s->above_ref_ctx, uint8_t *, 8);
329 assign(s->above_filter_ctx, uint8_t *, 8);
330 assign(s->lflvl, struct VP9Filter *, 1);
333 // these will be re-allocated a little later
334 av_freep(&s->b_base);
335 av_freep(&s->block_base);
337 if (s->s.h.bpp != s->last_bpp) {
338 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, ctx->flags & AV_CODEC_FLAG_BITEXACT);
339 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
340 s->last_bpp = s->s.h.bpp;
346 static int update_block_buffers(AVCodecContext *ctx)
348 VP9Context *s = ctx->priv_data;
349 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
351 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
355 av_free(s->block_base);
356 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
357 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
358 if (s->s.frames[CUR_FRAME].uses_2pass) {
359 int sbs = s->sb_cols * s->sb_rows;
361 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
362 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
363 16 * 16 + 2 * chroma_eobs) * sbs);
364 if (!s->b_base || !s->block_base)
365 return AVERROR(ENOMEM);
366 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
367 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
368 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
369 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
370 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
372 s->b_base = av_malloc(sizeof(VP9Block));
373 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
374 16 * 16 + 2 * chroma_eobs);
375 if (!s->b_base || !s->block_base)
376 return AVERROR(ENOMEM);
377 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
378 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
379 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
380 s->uveob_base[0] = s->eob_base + 16 * 16;
381 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
383 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
388 // for some reason the sign bit is at the end, not the start, of a bit sequence
389 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
391 int v = get_bits(gb, n);
392 return get_bits1(gb) ? -v : v;
395 static av_always_inline int inv_recenter_nonneg(int v, int m)
397 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
400 // differential forward probability updates
401 static int update_prob(VP56RangeCoder *c, int p)
403 static const int inv_map_table[255] = {
404 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
405 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
406 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
407 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
408 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
409 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
410 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
411 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
412 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
413 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
414 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
415 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
416 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
417 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
418 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
419 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
420 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
421 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
426 /* This code is trying to do a differential probability update. For a
427 * current probability A in the range [1, 255], the difference to a new
428 * probability of any value can be expressed differentially as 1-A,255-A
429 * where some part of this (absolute range) exists both in positive as
430 * well as the negative part, whereas another part only exists in one
431 * half. We're trying to code this shared part differentially, i.e.
432 * times two where the value of the lowest bit specifies the sign, and
433 * the single part is then coded on top of this. This absolute difference
434 * then again has a value of [0,254], but a bigger value in this range
435 * indicates that we're further away from the original value A, so we
436 * can code this as a VLC code, since higher values are increasingly
437 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
438 * updates vs. the 'fine, exact' updates further down the range, which
439 * adds one extra dimension to this differential update model. */
441 if (!vp8_rac_get(c)) {
442 d = vp8_rac_get_uint(c, 4) + 0;
443 } else if (!vp8_rac_get(c)) {
444 d = vp8_rac_get_uint(c, 4) + 16;
445 } else if (!vp8_rac_get(c)) {
446 d = vp8_rac_get_uint(c, 5) + 32;
448 d = vp8_rac_get_uint(c, 7);
450 d = (d << 1) - 65 + vp8_rac_get(c);
452 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
455 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
456 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
459 static int read_colorspace_details(AVCodecContext *ctx)
461 static const enum AVColorSpace colorspaces[8] = {
462 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
463 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
465 VP9Context *s = ctx->priv_data;
466 int bits = ctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
469 s->s.h.bpp = 8 + bits * 2;
470 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
471 ctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
472 if (ctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
473 static const enum AVPixelFormat pix_fmt_rgb[3] = {
474 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
476 s->ss_h = s->ss_v = 0;
477 ctx->color_range = AVCOL_RANGE_JPEG;
478 s->pix_fmt = pix_fmt_rgb[bits];
479 if (ctx->profile & 1) {
480 if (get_bits1(&s->gb)) {
481 av_log(ctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
482 return AVERROR_INVALIDDATA;
485 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
487 return AVERROR_INVALIDDATA;
490 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
491 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
492 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
493 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
494 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
495 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
496 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
498 ctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
499 if (ctx->profile & 1) {
500 s->ss_h = get_bits1(&s->gb);
501 s->ss_v = get_bits1(&s->gb);
502 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
503 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
504 av_log(ctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
506 return AVERROR_INVALIDDATA;
507 } else if (get_bits1(&s->gb)) {
508 av_log(ctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
510 return AVERROR_INVALIDDATA;
513 s->ss_h = s->ss_v = 1;
514 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
521 static int decode_frame_header(AVCodecContext *ctx,
522 const uint8_t *data, int size, int *ref)
524 VP9Context *s = ctx->priv_data;
525 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
527 const uint8_t *data2;
530 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
531 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
534 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
535 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
536 return AVERROR_INVALIDDATA;
538 ctx->profile = get_bits1(&s->gb);
539 ctx->profile |= get_bits1(&s->gb) << 1;
540 if (ctx->profile == 3) ctx->profile += get_bits1(&s->gb);
541 if (ctx->profile > 3) {
542 av_log(ctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", ctx->profile);
543 return AVERROR_INVALIDDATA;
545 s->s.h.profile = ctx->profile;
546 if (get_bits1(&s->gb)) {
547 *ref = get_bits(&s->gb, 3);
550 s->last_keyframe = s->s.h.keyframe;
551 s->s.h.keyframe = !get_bits1(&s->gb);
552 last_invisible = s->s.h.invisible;
553 s->s.h.invisible = !get_bits1(&s->gb);
554 s->s.h.errorres = get_bits1(&s->gb);
555 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
556 if (s->s.h.keyframe) {
557 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
558 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
559 return AVERROR_INVALIDDATA;
561 if ((res = read_colorspace_details(ctx)) < 0)
563 // for profile 1, here follows the subsampling bits
564 s->s.h.refreshrefmask = 0xff;
565 w = get_bits(&s->gb, 16) + 1;
566 h = get_bits(&s->gb, 16) + 1;
567 if (get_bits1(&s->gb)) // display size
568 skip_bits(&s->gb, 32);
570 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
571 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
572 if (s->s.h.intraonly) {
573 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
574 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
575 return AVERROR_INVALIDDATA;
577 if (ctx->profile >= 1) {
578 if ((res = read_colorspace_details(ctx)) < 0)
581 s->ss_h = s->ss_v = 1;
584 s->bytesperpixel = 1;
585 s->pix_fmt = AV_PIX_FMT_YUV420P;
586 ctx->colorspace = AVCOL_SPC_BT470BG;
587 ctx->color_range = AVCOL_RANGE_JPEG;
589 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
590 w = get_bits(&s->gb, 16) + 1;
591 h = get_bits(&s->gb, 16) + 1;
592 if (get_bits1(&s->gb)) // display size
593 skip_bits(&s->gb, 32);
595 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
596 s->s.h.refidx[0] = get_bits(&s->gb, 3);
597 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
598 s->s.h.refidx[1] = get_bits(&s->gb, 3);
599 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
600 s->s.h.refidx[2] = get_bits(&s->gb, 3);
601 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
602 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
603 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
604 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
605 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
606 return AVERROR_INVALIDDATA;
608 if (get_bits1(&s->gb)) {
609 w = s->s.refs[s->s.h.refidx[0]].f->width;
610 h = s->s.refs[s->s.h.refidx[0]].f->height;
611 } else if (get_bits1(&s->gb)) {
612 w = s->s.refs[s->s.h.refidx[1]].f->width;
613 h = s->s.refs[s->s.h.refidx[1]].f->height;
614 } else if (get_bits1(&s->gb)) {
615 w = s->s.refs[s->s.h.refidx[2]].f->width;
616 h = s->s.refs[s->s.h.refidx[2]].f->height;
618 w = get_bits(&s->gb, 16) + 1;
619 h = get_bits(&s->gb, 16) + 1;
621 // Note that in this code, "CUR_FRAME" is actually before we
622 // have formally allocated a frame, and thus actually represents
624 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
625 s->s.frames[CUR_FRAME].tf.f->height == h;
626 if (get_bits1(&s->gb)) // display size
627 skip_bits(&s->gb, 32);
628 s->s.h.highprecisionmvs = get_bits1(&s->gb);
629 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
631 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
632 s->s.h.signbias[0] != s->s.h.signbias[2];
633 if (s->s.h.allowcompinter) {
634 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
635 s->s.h.fixcompref = 2;
636 s->s.h.varcompref[0] = 0;
637 s->s.h.varcompref[1] = 1;
638 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
639 s->s.h.fixcompref = 1;
640 s->s.h.varcompref[0] = 0;
641 s->s.h.varcompref[1] = 2;
643 s->s.h.fixcompref = 0;
644 s->s.h.varcompref[0] = 1;
645 s->s.h.varcompref[1] = 2;
650 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
651 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
652 s->s.h.framectxid = c = get_bits(&s->gb, 2);
653 if (s->s.h.keyframe || s->s.h.intraonly)
654 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
656 /* loopfilter header data */
657 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
658 // reset loopfilter defaults
659 s->s.h.lf_delta.ref[0] = 1;
660 s->s.h.lf_delta.ref[1] = 0;
661 s->s.h.lf_delta.ref[2] = -1;
662 s->s.h.lf_delta.ref[3] = -1;
663 s->s.h.lf_delta.mode[0] = 0;
664 s->s.h.lf_delta.mode[1] = 0;
665 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
667 s->s.h.filter.level = get_bits(&s->gb, 6);
668 sharp = get_bits(&s->gb, 3);
669 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
670 // the old cache values since they are still valid
671 if (s->s.h.filter.sharpness != sharp)
672 memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut));
673 s->s.h.filter.sharpness = sharp;
674 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
675 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
676 for (i = 0; i < 4; i++)
677 if (get_bits1(&s->gb))
678 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
679 for (i = 0; i < 2; i++)
680 if (get_bits1(&s->gb))
681 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
685 /* quantization header data */
686 s->s.h.yac_qi = get_bits(&s->gb, 8);
687 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
688 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
689 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
690 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
691 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
693 ctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
695 /* segmentation header info */
696 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
697 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
698 for (i = 0; i < 7; i++)
699 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
700 get_bits(&s->gb, 8) : 255;
701 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb))) {
702 for (i = 0; i < 3; i++)
703 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
704 get_bits(&s->gb, 8) : 255;
708 if (get_bits1(&s->gb)) {
709 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
710 for (i = 0; i < 8; i++) {
711 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
712 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
713 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
714 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
715 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
716 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
717 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
722 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
723 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
724 int qyac, qydc, quvac, quvdc, lflvl, sh;
726 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
727 if (s->s.h.segmentation.absolute_vals)
728 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
730 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
732 qyac = s->s.h.yac_qi;
734 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
735 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
736 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
737 qyac = av_clip_uintp2(qyac, 8);
739 s->s.h.segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[s->bpp_index][qydc];
740 s->s.h.segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[s->bpp_index][qyac];
741 s->s.h.segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[s->bpp_index][quvdc];
742 s->s.h.segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[s->bpp_index][quvac];
744 sh = s->s.h.filter.level >= 32;
745 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
746 if (s->s.h.segmentation.absolute_vals)
747 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
749 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
751 lflvl = s->s.h.filter.level;
753 if (s->s.h.lf_delta.enabled) {
754 s->s.h.segmentation.feat[i].lflvl[0][0] =
755 s->s.h.segmentation.feat[i].lflvl[0][1] =
756 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
757 for (j = 1; j < 4; j++) {
758 s->s.h.segmentation.feat[i].lflvl[j][0] =
759 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
760 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
761 s->s.h.segmentation.feat[i].lflvl[j][1] =
762 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
763 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
766 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
767 sizeof(s->s.h.segmentation.feat[i].lflvl));
772 if ((res = update_size(ctx, w, h)) < 0) {
773 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
777 for (s->s.h.tiling.log2_tile_cols = 0;
778 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
779 s->s.h.tiling.log2_tile_cols++) ;
780 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
781 max = FFMAX(0, max - 1);
782 while (max > s->s.h.tiling.log2_tile_cols) {
783 if (get_bits1(&s->gb))
784 s->s.h.tiling.log2_tile_cols++;
788 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
789 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
790 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
791 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
792 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
793 sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols);
795 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
796 return AVERROR(ENOMEM);
800 /* check reference frames */
801 if (!s->s.h.keyframe && !s->s.h.intraonly) {
802 for (i = 0; i < 3; i++) {
803 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
804 int refw = ref->width, refh = ref->height;
806 if (ref->format != ctx->pix_fmt) {
807 av_log(ctx, AV_LOG_ERROR,
808 "Ref pixfmt (%s) did not match current frame (%s)",
809 av_get_pix_fmt_name(ref->format),
810 av_get_pix_fmt_name(ctx->pix_fmt));
811 return AVERROR_INVALIDDATA;
812 } else if (refw == w && refh == h) {
813 s->mvscale[i][0] = s->mvscale[i][1] = 0;
815 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
816 av_log(ctx, AV_LOG_ERROR,
817 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
819 return AVERROR_INVALIDDATA;
821 s->mvscale[i][0] = (refw << 14) / w;
822 s->mvscale[i][1] = (refh << 14) / h;
823 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
824 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
829 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
830 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
831 s->prob_ctx[3].p = vp9_default_probs;
832 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
833 sizeof(vp9_default_coef_probs));
834 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
835 sizeof(vp9_default_coef_probs));
836 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
837 sizeof(vp9_default_coef_probs));
838 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
839 sizeof(vp9_default_coef_probs));
840 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
841 s->prob_ctx[c].p = vp9_default_probs;
842 memcpy(s->prob_ctx[c].coef, vp9_default_coef_probs,
843 sizeof(vp9_default_coef_probs));
846 // next 16 bits is size of the rest of the header (arith-coded)
847 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
848 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
850 data2 = align_get_bits(&s->gb);
851 if (size2 > size - (data2 - data)) {
852 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
853 return AVERROR_INVALIDDATA;
855 ff_vp56_init_range_decoder(&s->c, data2, size2);
856 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
857 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
858 return AVERROR_INVALIDDATA;
861 if (s->s.h.keyframe || s->s.h.intraonly) {
862 memset(s->counts.coef, 0, sizeof(s->counts.coef));
863 memset(s->counts.eob, 0, sizeof(s->counts.eob));
865 memset(&s->counts, 0, sizeof(s->counts));
867 // FIXME is it faster to not copy here, but do it down in the fw updates
868 // as explicit copies if the fw update is missing (and skip the copy upon
870 s->prob.p = s->prob_ctx[c].p;
873 if (s->s.h.lossless) {
874 s->s.h.txfmmode = TX_4X4;
876 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
877 if (s->s.h.txfmmode == 3)
878 s->s.h.txfmmode += vp8_rac_get(&s->c);
880 if (s->s.h.txfmmode == TX_SWITCHABLE) {
881 for (i = 0; i < 2; i++)
882 if (vp56_rac_get_prob_branchy(&s->c, 252))
883 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
884 for (i = 0; i < 2; i++)
885 for (j = 0; j < 2; j++)
886 if (vp56_rac_get_prob_branchy(&s->c, 252))
887 s->prob.p.tx16p[i][j] =
888 update_prob(&s->c, s->prob.p.tx16p[i][j]);
889 for (i = 0; i < 2; i++)
890 for (j = 0; j < 3; j++)
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.tx32p[i][j] =
893 update_prob(&s->c, s->prob.p.tx32p[i][j]);
898 for (i = 0; i < 4; i++) {
899 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
900 if (vp8_rac_get(&s->c)) {
901 for (j = 0; j < 2; j++)
902 for (k = 0; k < 2; k++)
903 for (l = 0; l < 6; l++)
904 for (m = 0; m < 6; m++) {
905 uint8_t *p = s->prob.coef[i][j][k][l][m];
906 uint8_t *r = ref[j][k][l][m];
907 if (m >= 3 && l == 0) // dc only has 3 pt
909 for (n = 0; n < 3; n++) {
910 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
911 p[n] = update_prob(&s->c, r[n]);
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 2; k++)
921 for (l = 0; l < 6; l++)
922 for (m = 0; m < 6; m++) {
923 uint8_t *p = s->prob.coef[i][j][k][l][m];
924 uint8_t *r = ref[j][k][l][m];
925 if (m > 3 && l == 0) // dc only has 3 pt
931 if (s->s.h.txfmmode == i)
936 for (i = 0; i < 3; i++)
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
939 if (!s->s.h.keyframe && !s->s.h.intraonly) {
940 for (i = 0; i < 7; i++)
941 for (j = 0; j < 3; j++)
942 if (vp56_rac_get_prob_branchy(&s->c, 252))
943 s->prob.p.mv_mode[i][j] =
944 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
946 if (s->s.h.filtermode == FILTER_SWITCHABLE)
947 for (i = 0; i < 4; i++)
948 for (j = 0; j < 2; j++)
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.filter[i][j] =
951 update_prob(&s->c, s->prob.p.filter[i][j]);
953 for (i = 0; i < 4; i++)
954 if (vp56_rac_get_prob_branchy(&s->c, 252))
955 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
957 if (s->s.h.allowcompinter) {
958 s->s.h.comppredmode = vp8_rac_get(&s->c);
959 if (s->s.h.comppredmode)
960 s->s.h.comppredmode += vp8_rac_get(&s->c);
961 if (s->s.h.comppredmode == PRED_SWITCHABLE)
962 for (i = 0; i < 5; i++)
963 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 update_prob(&s->c, s->prob.p.comp[i]);
967 s->s.h.comppredmode = PRED_SINGLEREF;
970 if (s->s.h.comppredmode != PRED_COMPREF) {
971 for (i = 0; i < 5; i++) {
972 if (vp56_rac_get_prob_branchy(&s->c, 252))
973 s->prob.p.single_ref[i][0] =
974 update_prob(&s->c, s->prob.p.single_ref[i][0]);
975 if (vp56_rac_get_prob_branchy(&s->c, 252))
976 s->prob.p.single_ref[i][1] =
977 update_prob(&s->c, s->prob.p.single_ref[i][1]);
981 if (s->s.h.comppredmode != PRED_SINGLEREF) {
982 for (i = 0; i < 5; i++)
983 if (vp56_rac_get_prob_branchy(&s->c, 252))
984 s->prob.p.comp_ref[i] =
985 update_prob(&s->c, s->prob.p.comp_ref[i]);
988 for (i = 0; i < 4; i++)
989 for (j = 0; j < 9; j++)
990 if (vp56_rac_get_prob_branchy(&s->c, 252))
991 s->prob.p.y_mode[i][j] =
992 update_prob(&s->c, s->prob.p.y_mode[i][j]);
994 for (i = 0; i < 4; i++)
995 for (j = 0; j < 4; j++)
996 for (k = 0; k < 3; k++)
997 if (vp56_rac_get_prob_branchy(&s->c, 252))
998 s->prob.p.partition[3 - i][j][k] =
999 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
1001 // mv fields don't use the update_prob subexp model for some reason
1002 for (i = 0; i < 3; i++)
1003 if (vp56_rac_get_prob_branchy(&s->c, 252))
1004 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1006 for (i = 0; i < 2; i++) {
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1010 for (j = 0; j < 10; j++)
1011 if (vp56_rac_get_prob_branchy(&s->c, 252))
1012 s->prob.p.mv_comp[i].classes[j] =
1013 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1015 if (vp56_rac_get_prob_branchy(&s->c, 252))
1016 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1018 for (j = 0; j < 10; j++)
1019 if (vp56_rac_get_prob_branchy(&s->c, 252))
1020 s->prob.p.mv_comp[i].bits[j] =
1021 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1024 for (i = 0; i < 2; i++) {
1025 for (j = 0; j < 2; j++)
1026 for (k = 0; k < 3; k++)
1027 if (vp56_rac_get_prob_branchy(&s->c, 252))
1028 s->prob.p.mv_comp[i].class0_fp[j][k] =
1029 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031 for (j = 0; j < 3; j++)
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].fp[j] =
1034 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1037 if (s->s.h.highprecisionmvs) {
1038 for (i = 0; i < 2; i++) {
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_comp[i].class0_hp =
1041 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1043 if (vp56_rac_get_prob_branchy(&s->c, 252))
1044 s->prob.p.mv_comp[i].hp =
1045 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050 return (data2 - data) + size2;
1053 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
1056 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
1057 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
1060 static void find_ref_mvs(VP9Context *s,
1061 VP56mv *pmv, int ref, int z, int idx, int sb)
1063 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
1064 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
1065 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
1066 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
1067 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
1068 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
1069 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
1070 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
1071 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1072 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
1073 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1074 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
1075 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
1076 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
1077 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
1078 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
1079 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
1080 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
1081 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
1082 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1083 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1084 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1085 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1086 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1087 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1088 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1089 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1092 int row = s->row, col = s->col, row7 = s->row7;
1093 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1094 #define INVALID_MV 0x80008000U
1095 uint32_t mem = INVALID_MV, mem_sub8x8 = INVALID_MV;
1098 #define RETURN_DIRECT_MV(mv) \
1100 uint32_t m = AV_RN32A(&mv); \
1104 } else if (mem == INVALID_MV) { \
1106 } else if (m != mem) { \
1113 if (sb == 2 || sb == 1) {
1114 RETURN_DIRECT_MV(b->mv[0][z]);
1115 } else if (sb == 3) {
1116 RETURN_DIRECT_MV(b->mv[2][z]);
1117 RETURN_DIRECT_MV(b->mv[1][z]);
1118 RETURN_DIRECT_MV(b->mv[0][z]);
1121 #define RETURN_MV(mv) \
1126 av_assert2(idx == 1); \
1127 av_assert2(mem != INVALID_MV); \
1128 if (mem_sub8x8 == INVALID_MV) { \
1129 clamp_mv(&tmp, &mv, s); \
1130 m = AV_RN32A(&tmp); \
1135 mem_sub8x8 = AV_RN32A(&mv); \
1136 } else if (mem_sub8x8 != AV_RN32A(&mv)) { \
1137 clamp_mv(&tmp, &mv, s); \
1138 m = AV_RN32A(&tmp); \
1142 /* BUG I'm pretty sure this isn't the intention */ \
1148 uint32_t m = AV_RN32A(&mv); \
1150 clamp_mv(pmv, &mv, s); \
1152 } else if (mem == INVALID_MV) { \
1154 } else if (m != mem) { \
1155 clamp_mv(pmv, &mv, s); \
1162 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1163 if (mv->ref[0] == ref) {
1164 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1165 } else if (mv->ref[1] == ref) {
1166 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1169 if (col > s->tile_col_start) {
1170 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1171 if (mv->ref[0] == ref) {
1172 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1173 } else if (mv->ref[1] == ref) {
1174 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1182 // previously coded MVs in this neighbourhood, using same reference frame
1183 for (; i < 8; i++) {
1184 int c = p[i][0] + col, r = p[i][1] + row;
1186 if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1187 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1189 if (mv->ref[0] == ref) {
1190 RETURN_MV(mv->mv[0]);
1191 } else if (mv->ref[1] == ref) {
1192 RETURN_MV(mv->mv[1]);
1197 // MV at this position in previous frame, using same reference frame
1198 if (s->s.h.use_last_frame_mvs) {
1199 struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1201 if (!s->s.frames[REF_FRAME_MVPAIR].uses_2pass)
1202 ff_thread_await_progress(&s->s.frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1203 if (mv->ref[0] == ref) {
1204 RETURN_MV(mv->mv[0]);
1205 } else if (mv->ref[1] == ref) {
1206 RETURN_MV(mv->mv[1]);
1210 #define RETURN_SCALE_MV(mv, scale) \
1213 VP56mv mv_temp = { -mv.x, -mv.y }; \
1214 RETURN_MV(mv_temp); \
1220 // previously coded MVs in this neighbourhood, using different reference frame
1221 for (i = 0; i < 8; i++) {
1222 int c = p[i][0] + col, r = p[i][1] + row;
1224 if (c >= s->tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1225 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1227 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1228 RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
1230 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1231 // BUG - libvpx has this condition regardless of whether
1232 // we used the first ref MV and pre-scaling
1233 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1234 RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
1239 // MV at this position in previous frame, using different reference frame
1240 if (s->s.h.use_last_frame_mvs) {
1241 struct VP9mvrefPair *mv = &s->s.frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1243 // no need to await_progress, because we already did that above
1244 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1245 RETURN_SCALE_MV(mv->mv[0], s->s.h.signbias[mv->ref[0]] != s->s.h.signbias[ref]);
1247 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1248 // BUG - libvpx has this condition regardless of whether
1249 // we used the first ref MV and pre-scaling
1250 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1251 RETURN_SCALE_MV(mv->mv[1], s->s.h.signbias[mv->ref[1]] != s->s.h.signbias[ref]);
1256 clamp_mv(pmv, pmv, s);
1259 #undef RETURN_SCALE_MV
1262 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1264 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1265 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1266 s->prob.p.mv_comp[idx].classes);
1268 s->counts.mv_comp[idx].sign[sign]++;
1269 s->counts.mv_comp[idx].classes[c]++;
1273 for (n = 0, m = 0; m < c; m++) {
1274 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1276 s->counts.mv_comp[idx].bits[m][bit]++;
1279 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1281 s->counts.mv_comp[idx].fp[bit]++;
1283 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1284 s->counts.mv_comp[idx].hp[bit]++;
1288 // bug in libvpx - we count for bw entropy purposes even if the
1290 s->counts.mv_comp[idx].hp[1]++;
1294 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1295 s->counts.mv_comp[idx].class0[n]++;
1296 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1297 s->prob.p.mv_comp[idx].class0_fp[n]);
1298 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1299 n = (n << 3) | (bit << 1);
1301 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1302 s->counts.mv_comp[idx].class0_hp[bit]++;
1306 // bug in libvpx - we count for bw entropy purposes even if the
1308 s->counts.mv_comp[idx].class0_hp[1]++;
1312 return sign ? -(n + 1) : (n + 1);
1315 static void fill_mv(VP9Context *s,
1316 VP56mv *mv, int mode, int sb)
1320 if (mode == ZEROMV) {
1325 // FIXME cache this value and reuse for other subblocks
1326 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1327 mode == NEWMV ? -1 : sb);
1328 // FIXME maybe move this code into find_ref_mvs()
1329 if ((mode == NEWMV || sb == -1) &&
1330 !(hp = s->s.h.highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1344 if (mode == NEWMV) {
1345 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1346 s->prob.p.mv_joint);
1348 s->counts.mv_joint[j]++;
1349 if (j >= MV_JOINT_V)
1350 mv[0].y += read_mv_component(s, 0, hp);
1352 mv[0].x += read_mv_component(s, 1, hp);
1356 // FIXME cache this value and reuse for other subblocks
1357 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1358 mode == NEWMV ? -1 : sb);
1359 if ((mode == NEWMV || sb == -1) &&
1360 !(hp = s->s.h.highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1374 if (mode == NEWMV) {
1375 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1376 s->prob.p.mv_joint);
1378 s->counts.mv_joint[j]++;
1379 if (j >= MV_JOINT_V)
1380 mv[1].y += read_mv_component(s, 0, hp);
1382 mv[1].x += read_mv_component(s, 1, hp);
1388 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1389 ptrdiff_t stride, int v)
1399 int v16 = v * 0x0101;
1407 uint32_t v32 = v * 0x01010101;
1416 uint64_t v64 = v * 0x0101010101010101ULL;
1422 uint32_t v32 = v * 0x01010101;
1425 AV_WN32A(ptr + 4, v32);
1434 static void decode_mode(AVCodecContext *ctx)
1436 static const uint8_t left_ctx[N_BS_SIZES] = {
1437 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1439 static const uint8_t above_ctx[N_BS_SIZES] = {
1440 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1442 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1443 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1444 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1446 VP9Context *s = ctx->priv_data;
1448 int row = s->row, col = s->col, row7 = s->row7;
1449 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1450 int bw4 = bwh_tab[1][b->bs][0], w4 = FFMIN(s->cols - col, bw4);
1451 int bh4 = bwh_tab[1][b->bs][1], h4 = FFMIN(s->rows - row, bh4), y;
1452 int have_a = row > 0, have_l = col > s->tile_col_start;
1453 int vref, filter_id;
1455 if (!s->s.h.segmentation.enabled) {
1457 } else if (s->s.h.keyframe || s->s.h.intraonly) {
1458 b->seg_id = !s->s.h.segmentation.update_map ? 0 :
1459 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->s.h.segmentation.prob);
1460 } else if (!s->s.h.segmentation.update_map ||
1461 (s->s.h.segmentation.temporal &&
1462 vp56_rac_get_prob_branchy(&s->c,
1463 s->s.h.segmentation.pred_prob[s->above_segpred_ctx[col] +
1464 s->left_segpred_ctx[row7]]))) {
1465 if (!s->s.h.errorres && s->s.frames[REF_FRAME_SEGMAP].segmentation_map) {
1467 uint8_t *refsegmap = s->s.frames[REF_FRAME_SEGMAP].segmentation_map;
1469 if (!s->s.frames[REF_FRAME_SEGMAP].uses_2pass)
1470 ff_thread_await_progress(&s->s.frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1471 for (y = 0; y < h4; y++) {
1472 int idx_base = (y + row) * 8 * s->sb_cols + col;
1473 for (x = 0; x < w4; x++)
1474 pred = FFMIN(pred, refsegmap[idx_base + x]);
1476 av_assert1(pred < 8);
1482 memset(&s->above_segpred_ctx[col], 1, w4);
1483 memset(&s->left_segpred_ctx[row7], 1, h4);
1485 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1486 s->s.h.segmentation.prob);
1488 memset(&s->above_segpred_ctx[col], 0, w4);
1489 memset(&s->left_segpred_ctx[row7], 0, h4);
1491 if (s->s.h.segmentation.enabled &&
1492 (s->s.h.segmentation.update_map || s->s.h.keyframe || s->s.h.intraonly)) {
1493 setctx_2d(&s->s.frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1494 bw4, bh4, 8 * s->sb_cols, b->seg_id);
1497 b->skip = s->s.h.segmentation.enabled &&
1498 s->s.h.segmentation.feat[b->seg_id].skip_enabled;
1500 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1501 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1502 s->counts.skip[c][b->skip]++;
1505 if (s->s.h.keyframe || s->s.h.intraonly) {
1507 } else if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
1508 b->intra = !s->s.h.segmentation.feat[b->seg_id].ref_val;
1512 if (have_a && have_l) {
1513 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1516 c = have_a ? 2 * s->above_intra_ctx[col] :
1517 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1519 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1520 s->counts.intra[c][bit]++;
1524 if ((b->intra || !b->skip) && s->s.h.txfmmode == TX_SWITCHABLE) {
1528 c = (s->above_skip_ctx[col] ? max_tx :
1529 s->above_txfm_ctx[col]) +
1530 (s->left_skip_ctx[row7] ? max_tx :
1531 s->left_txfm_ctx[row7]) > max_tx;
1533 c = s->above_skip_ctx[col] ? 1 :
1534 (s->above_txfm_ctx[col] * 2 > max_tx);
1536 } else if (have_l) {
1537 c = s->left_skip_ctx[row7] ? 1 :
1538 (s->left_txfm_ctx[row7] * 2 > max_tx);
1544 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1546 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1548 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1550 s->counts.tx32p[c][b->tx]++;
1553 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1555 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1556 s->counts.tx16p[c][b->tx]++;
1559 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1560 s->counts.tx8p[c][b->tx]++;
1567 b->tx = FFMIN(max_tx, s->s.h.txfmmode);
1570 if (s->s.h.keyframe || s->s.h.intraonly) {
1571 uint8_t *a = &s->above_mode_ctx[col * 2];
1572 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1575 if (b->bs > BS_8x8) {
1576 // FIXME the memory storage intermediates here aren't really
1577 // necessary, they're just there to make the code slightly
1579 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1580 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1581 if (b->bs != BS_8x4) {
1582 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1583 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1584 l[0] = a[1] = b->mode[1];
1586 l[0] = a[1] = b->mode[1] = b->mode[0];
1588 if (b->bs != BS_4x8) {
1589 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1590 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1591 if (b->bs != BS_8x4) {
1592 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1593 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1594 l[1] = a[1] = b->mode[3];
1596 l[1] = a[1] = b->mode[3] = b->mode[2];
1599 b->mode[2] = b->mode[0];
1600 l[1] = a[1] = b->mode[3] = b->mode[1];
1603 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1604 vp9_default_kf_ymode_probs[*a][*l]);
1605 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1606 // FIXME this can probably be optimized
1607 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1608 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1610 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1611 vp9_default_kf_uvmode_probs[b->mode[3]]);
1612 } else if (b->intra) {
1614 if (b->bs > BS_8x8) {
1615 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1616 s->prob.p.y_mode[0]);
1617 s->counts.y_mode[0][b->mode[0]]++;
1618 if (b->bs != BS_8x4) {
1619 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1620 s->prob.p.y_mode[0]);
1621 s->counts.y_mode[0][b->mode[1]]++;
1623 b->mode[1] = b->mode[0];
1625 if (b->bs != BS_4x8) {
1626 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1627 s->prob.p.y_mode[0]);
1628 s->counts.y_mode[0][b->mode[2]]++;
1629 if (b->bs != BS_8x4) {
1630 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1631 s->prob.p.y_mode[0]);
1632 s->counts.y_mode[0][b->mode[3]]++;
1634 b->mode[3] = b->mode[2];
1637 b->mode[2] = b->mode[0];
1638 b->mode[3] = b->mode[1];
1641 static const uint8_t size_group[10] = {
1642 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1644 int sz = size_group[b->bs];
1646 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1647 s->prob.p.y_mode[sz]);
1648 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1649 s->counts.y_mode[sz][b->mode[3]]++;
1651 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1652 s->prob.p.uv_mode[b->mode[3]]);
1653 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1655 static const uint8_t inter_mode_ctx_lut[14][14] = {
1656 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1657 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1658 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1659 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1660 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1661 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1662 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1663 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1664 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1665 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1666 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1667 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1668 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1669 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1672 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].ref_enabled) {
1673 av_assert2(s->s.h.segmentation.feat[b->seg_id].ref_val != 0);
1675 b->ref[0] = s->s.h.segmentation.feat[b->seg_id].ref_val - 1;
1677 // read comp_pred flag
1678 if (s->s.h.comppredmode != PRED_SWITCHABLE) {
1679 b->comp = s->s.h.comppredmode == PRED_COMPREF;
1683 // FIXME add intra as ref=0xff (or -1) to make these easier?
1686 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1688 } else if (s->above_comp_ctx[col]) {
1689 c = 2 + (s->left_intra_ctx[row7] ||
1690 s->left_ref_ctx[row7] == s->s.h.fixcompref);
1691 } else if (s->left_comp_ctx[row7]) {
1692 c = 2 + (s->above_intra_ctx[col] ||
1693 s->above_ref_ctx[col] == s->s.h.fixcompref);
1695 c = (!s->above_intra_ctx[col] &&
1696 s->above_ref_ctx[col] == s->s.h.fixcompref) ^
1697 (!s->left_intra_ctx[row7] &&
1698 s->left_ref_ctx[row & 7] == s->s.h.fixcompref);
1701 c = s->above_comp_ctx[col] ? 3 :
1702 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->s.h.fixcompref);
1704 } else if (have_l) {
1705 c = s->left_comp_ctx[row7] ? 3 :
1706 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->s.h.fixcompref);
1710 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1711 s->counts.comp[c][b->comp]++;
1714 // read actual references
1715 // FIXME probably cache a few variables here to prevent repetitive
1716 // memory accesses below
1717 if (b->comp) /* two references */ {
1718 int fix_idx = s->s.h.signbias[s->s.h.fixcompref], var_idx = !fix_idx, c, bit;
1720 b->ref[fix_idx] = s->s.h.fixcompref;
1721 // FIXME can this codeblob be replaced by some sort of LUT?
1724 if (s->above_intra_ctx[col]) {
1725 if (s->left_intra_ctx[row7]) {
1728 c = 1 + 2 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1730 } else if (s->left_intra_ctx[row7]) {
1731 c = 1 + 2 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1733 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1735 if (refl == refa && refa == s->s.h.varcompref[1]) {
1737 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1738 if ((refa == s->s.h.fixcompref && refl == s->s.h.varcompref[0]) ||
1739 (refl == s->s.h.fixcompref && refa == s->s.h.varcompref[0])) {
1742 c = (refa == refl) ? 3 : 1;
1744 } else if (!s->left_comp_ctx[row7]) {
1745 if (refa == s->s.h.varcompref[1] && refl != s->s.h.varcompref[1]) {
1748 c = (refl == s->s.h.varcompref[1] &&
1749 refa != s->s.h.varcompref[1]) ? 2 : 4;
1751 } else if (!s->above_comp_ctx[col]) {
1752 if (refl == s->s.h.varcompref[1] && refa != s->s.h.varcompref[1]) {
1755 c = (refa == s->s.h.varcompref[1] &&
1756 refl != s->s.h.varcompref[1]) ? 2 : 4;
1759 c = (refl == refa) ? 4 : 2;
1763 if (s->above_intra_ctx[col]) {
1765 } else if (s->above_comp_ctx[col]) {
1766 c = 4 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1768 c = 3 * (s->above_ref_ctx[col] != s->s.h.varcompref[1]);
1771 } else if (have_l) {
1772 if (s->left_intra_ctx[row7]) {
1774 } else if (s->left_comp_ctx[row7]) {
1775 c = 4 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1777 c = 3 * (s->left_ref_ctx[row7] != s->s.h.varcompref[1]);
1782 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1783 b->ref[var_idx] = s->s.h.varcompref[bit];
1784 s->counts.comp_ref[c][bit]++;
1785 } else /* single reference */ {
1788 if (have_a && !s->above_intra_ctx[col]) {
1789 if (have_l && !s->left_intra_ctx[row7]) {
1790 if (s->left_comp_ctx[row7]) {
1791 if (s->above_comp_ctx[col]) {
1792 c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7] ||
1793 !s->above_ref_ctx[col]);
1795 c = (3 * !s->above_ref_ctx[col]) +
1796 (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
1798 } else if (s->above_comp_ctx[col]) {
1799 c = (3 * !s->left_ref_ctx[row7]) +
1800 (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
1802 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1804 } else if (s->above_intra_ctx[col]) {
1806 } else if (s->above_comp_ctx[col]) {
1807 c = 1 + (!s->s.h.fixcompref || !s->above_ref_ctx[col]);
1809 c = 4 * (!s->above_ref_ctx[col]);
1811 } else if (have_l && !s->left_intra_ctx[row7]) {
1812 if (s->left_intra_ctx[row7]) {
1814 } else if (s->left_comp_ctx[row7]) {
1815 c = 1 + (!s->s.h.fixcompref || !s->left_ref_ctx[row7]);
1817 c = 4 * (!s->left_ref_ctx[row7]);
1822 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1823 s->counts.single_ref[c][0][bit]++;
1827 // FIXME can this codeblob be replaced by some sort of LUT?
1830 if (s->left_intra_ctx[row7]) {
1831 if (s->above_intra_ctx[col]) {
1833 } else if (s->above_comp_ctx[col]) {
1834 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1835 s->above_ref_ctx[col] == 1);
1836 } else if (!s->above_ref_ctx[col]) {
1839 c = 4 * (s->above_ref_ctx[col] == 1);
1841 } else if (s->above_intra_ctx[col]) {
1842 if (s->left_intra_ctx[row7]) {
1844 } else if (s->left_comp_ctx[row7]) {
1845 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1846 s->left_ref_ctx[row7] == 1);
1847 } else if (!s->left_ref_ctx[row7]) {
1850 c = 4 * (s->left_ref_ctx[row7] == 1);
1852 } else if (s->above_comp_ctx[col]) {
1853 if (s->left_comp_ctx[row7]) {
1854 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1855 c = 3 * (s->s.h.fixcompref == 1 ||
1856 s->left_ref_ctx[row7] == 1);
1860 } else if (!s->left_ref_ctx[row7]) {
1861 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1862 s->above_ref_ctx[col] == 1);
1864 c = 3 * (s->left_ref_ctx[row7] == 1) +
1865 (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
1867 } else if (s->left_comp_ctx[row7]) {
1868 if (!s->above_ref_ctx[col]) {
1869 c = 1 + 2 * (s->s.h.fixcompref == 1 ||
1870 s->left_ref_ctx[row7] == 1);
1872 c = 3 * (s->above_ref_ctx[col] == 1) +
1873 (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1875 } else if (!s->above_ref_ctx[col]) {
1876 if (!s->left_ref_ctx[row7]) {
1879 c = 4 * (s->left_ref_ctx[row7] == 1);
1881 } else if (!s->left_ref_ctx[row7]) {
1882 c = 4 * (s->above_ref_ctx[col] == 1);
1884 c = 2 * (s->left_ref_ctx[row7] == 1) +
1885 2 * (s->above_ref_ctx[col] == 1);
1888 if (s->above_intra_ctx[col] ||
1889 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1891 } else if (s->above_comp_ctx[col]) {
1892 c = 3 * (s->s.h.fixcompref == 1 || s->above_ref_ctx[col] == 1);
1894 c = 4 * (s->above_ref_ctx[col] == 1);
1897 } else if (have_l) {
1898 if (s->left_intra_ctx[row7] ||
1899 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1901 } else if (s->left_comp_ctx[row7]) {
1902 c = 3 * (s->s.h.fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1904 c = 4 * (s->left_ref_ctx[row7] == 1);
1909 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1910 s->counts.single_ref[c][1][bit]++;
1911 b->ref[0] = 1 + bit;
1916 if (b->bs <= BS_8x8) {
1917 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[b->seg_id].skip_enabled) {
1918 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1920 static const uint8_t off[10] = {
1921 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1924 // FIXME this needs to use the LUT tables from find_ref_mvs
1925 // because not all are -1,0/0,-1
1926 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1927 [s->left_mode_ctx[row7 + off[b->bs]]];
1929 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1930 s->prob.p.mv_mode[c]);
1931 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1932 s->counts.mv_mode[c][b->mode[0] - 10]++;
1936 if (s->s.h.filtermode == FILTER_SWITCHABLE) {
1939 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1940 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1941 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1942 s->left_filter_ctx[row7] : 3;
1944 c = s->above_filter_ctx[col];
1946 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1947 c = s->left_filter_ctx[row7];
1952 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1953 s->prob.p.filter[c]);
1954 s->counts.filter[c][filter_id]++;
1955 b->filter = vp9_filter_lut[filter_id];
1957 b->filter = s->s.h.filtermode;
1960 if (b->bs > BS_8x8) {
1961 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1963 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1964 s->prob.p.mv_mode[c]);
1965 s->counts.mv_mode[c][b->mode[0] - 10]++;
1966 fill_mv(s, b->mv[0], b->mode[0], 0);
1968 if (b->bs != BS_8x4) {
1969 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1970 s->prob.p.mv_mode[c]);
1971 s->counts.mv_mode[c][b->mode[1] - 10]++;
1972 fill_mv(s, b->mv[1], b->mode[1], 1);
1974 b->mode[1] = b->mode[0];
1975 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1976 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1979 if (b->bs != BS_4x8) {
1980 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1981 s->prob.p.mv_mode[c]);
1982 s->counts.mv_mode[c][b->mode[2] - 10]++;
1983 fill_mv(s, b->mv[2], b->mode[2], 2);
1985 if (b->bs != BS_8x4) {
1986 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1987 s->prob.p.mv_mode[c]);
1988 s->counts.mv_mode[c][b->mode[3] - 10]++;
1989 fill_mv(s, b->mv[3], b->mode[3], 3);
1991 b->mode[3] = b->mode[2];
1992 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1993 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1996 b->mode[2] = b->mode[0];
1997 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1998 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1999 b->mode[3] = b->mode[1];
2000 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
2001 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
2004 fill_mv(s, b->mv[0], b->mode[0], -1);
2005 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
2006 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
2007 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
2008 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
2009 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
2010 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
2013 vref = b->ref[b->comp ? s->s.h.signbias[s->s.h.varcompref[0]] : 0];
2017 #define SPLAT_CTX(var, val, n) \
2019 case 1: var = val; break; \
2020 case 2: AV_WN16A(&var, val * 0x0101); break; \
2021 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2022 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
2024 uint64_t v64 = val * 0x0101010101010101ULL; \
2025 AV_WN64A( &var, v64); \
2026 AV_WN64A(&((uint8_t *) &var)[8], v64); \
2031 #define SPLAT_CTX(var, val, n) \
2033 case 1: var = val; break; \
2034 case 2: AV_WN16A(&var, val * 0x0101); break; \
2035 case 4: AV_WN32A(&var, val * 0x01010101); break; \
2037 uint32_t v32 = val * 0x01010101; \
2038 AV_WN32A( &var, v32); \
2039 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2043 uint32_t v32 = val * 0x01010101; \
2044 AV_WN32A( &var, v32); \
2045 AV_WN32A(&((uint8_t *) &var)[4], v32); \
2046 AV_WN32A(&((uint8_t *) &var)[8], v32); \
2047 AV_WN32A(&((uint8_t *) &var)[12], v32); \
2053 switch (bwh_tab[1][b->bs][0]) {
2054 #define SET_CTXS(dir, off, n) \
2056 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
2057 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
2058 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
2059 if (!s->s.h.keyframe && !s->s.h.intraonly) { \
2060 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
2061 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
2062 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
2064 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
2065 if (s->s.h.filtermode == FILTER_SWITCHABLE) { \
2066 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
2071 case 1: SET_CTXS(above, col, 1); break;
2072 case 2: SET_CTXS(above, col, 2); break;
2073 case 4: SET_CTXS(above, col, 4); break;
2074 case 8: SET_CTXS(above, col, 8); break;
2076 switch (bwh_tab[1][b->bs][1]) {
2077 case 1: SET_CTXS(left, row7, 1); break;
2078 case 2: SET_CTXS(left, row7, 2); break;
2079 case 4: SET_CTXS(left, row7, 4); break;
2080 case 8: SET_CTXS(left, row7, 8); break;
2085 if (!s->s.h.keyframe && !s->s.h.intraonly) {
2086 if (b->bs > BS_8x8) {
2087 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2089 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
2090 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
2091 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
2092 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
2093 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
2094 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
2095 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2096 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2098 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2100 for (n = 0; n < w4 * 2; n++) {
2101 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2102 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2104 for (n = 0; n < h4 * 2; n++) {
2105 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2106 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2112 for (y = 0; y < h4; y++) {
2113 int x, o = (row + y) * s->sb_cols * 8 + col;
2114 struct VP9mvrefPair *mv = &s->s.frames[CUR_FRAME].mv[o];
2117 for (x = 0; x < w4; x++) {
2121 } else if (b->comp) {
2122 for (x = 0; x < w4; x++) {
2123 mv[x].ref[0] = b->ref[0];
2124 mv[x].ref[1] = b->ref[1];
2125 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2126 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2129 for (x = 0; x < w4; x++) {
2130 mv[x].ref[0] = b->ref[0];
2132 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2138 // FIXME merge cnt/eob arguments?
2139 static av_always_inline int
2140 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2141 int is_tx32x32, int is8bitsperpixel, int bpp, unsigned (*cnt)[6][3],
2142 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2143 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2144 const int16_t *band_counts, const int16_t *qmul)
2146 int i = 0, band = 0, band_left = band_counts[band];
2147 uint8_t *tp = p[0][nnz];
2148 uint8_t cache[1024];
2153 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2154 eob[band][nnz][val]++;
2159 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2160 cnt[band][nnz][0]++;
2162 band_left = band_counts[++band];
2164 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2166 if (++i == n_coeffs)
2167 break; //invalid input; blocks should end with EOB
2172 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2173 cnt[band][nnz][1]++;
2177 // fill in p[3-10] (model fill) - only once per frame for each pos
2179 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2181 cnt[band][nnz][2]++;
2182 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2183 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2184 cache[rc] = val = 2;
2186 val = 3 + vp56_rac_get_prob(c, tp[5]);
2189 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2191 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2192 val = 5 + vp56_rac_get_prob(c, 159);
2194 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2195 val += vp56_rac_get_prob(c, 145);
2199 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2200 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2201 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2202 val += (vp56_rac_get_prob(c, 148) << 1);
2203 val += vp56_rac_get_prob(c, 140);
2205 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2206 val += (vp56_rac_get_prob(c, 155) << 2);
2207 val += (vp56_rac_get_prob(c, 140) << 1);
2208 val += vp56_rac_get_prob(c, 135);
2210 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2211 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2212 val += (vp56_rac_get_prob(c, 157) << 3);
2213 val += (vp56_rac_get_prob(c, 141) << 2);
2214 val += (vp56_rac_get_prob(c, 134) << 1);
2215 val += vp56_rac_get_prob(c, 130);
2218 if (!is8bitsperpixel) {
2220 val += vp56_rac_get_prob(c, 255) << 17;
2221 val += vp56_rac_get_prob(c, 255) << 16;
2223 val += (vp56_rac_get_prob(c, 255) << 15);
2224 val += (vp56_rac_get_prob(c, 255) << 14);
2226 val += (vp56_rac_get_prob(c, 254) << 13);
2227 val += (vp56_rac_get_prob(c, 254) << 12);
2228 val += (vp56_rac_get_prob(c, 254) << 11);
2229 val += (vp56_rac_get_prob(c, 252) << 10);
2230 val += (vp56_rac_get_prob(c, 249) << 9);
2231 val += (vp56_rac_get_prob(c, 243) << 8);
2232 val += (vp56_rac_get_prob(c, 230) << 7);
2233 val += (vp56_rac_get_prob(c, 196) << 6);
2234 val += (vp56_rac_get_prob(c, 177) << 5);
2235 val += (vp56_rac_get_prob(c, 153) << 4);
2236 val += (vp56_rac_get_prob(c, 140) << 3);
2237 val += (vp56_rac_get_prob(c, 133) << 2);
2238 val += (vp56_rac_get_prob(c, 130) << 1);
2239 val += vp56_rac_get_prob(c, 129);
2243 #define STORE_COEF(c, i, v) do { \
2244 if (is8bitsperpixel) { \
2247 AV_WN32A(&c[i * 2], v); \
2251 band_left = band_counts[++band];
2253 STORE_COEF(coef, rc, ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2);
2255 STORE_COEF(coef, rc, (vp8_rac_get(c) ? -val : val) * qmul[!!i]);
2256 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2258 } while (++i < n_coeffs);
2263 static int decode_coeffs_b_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2264 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2265 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2266 const int16_t (*nb)[2], const int16_t *band_counts,
2267 const int16_t *qmul)
2269 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 1, 8, cnt, eob, p,
2270 nnz, scan, nb, band_counts, qmul);
2273 static int decode_coeffs_b32_8bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2274 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2275 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2276 const int16_t (*nb)[2], const int16_t *band_counts,
2277 const int16_t *qmul)
2279 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 1, 8, cnt, eob, p,
2280 nnz, scan, nb, band_counts, qmul);
2283 static int decode_coeffs_b_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2284 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2285 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2286 const int16_t (*nb)[2], const int16_t *band_counts,
2287 const int16_t *qmul)
2289 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 0, 0, s->s.h.bpp, cnt, eob, p,
2290 nnz, scan, nb, band_counts, qmul);
2293 static int decode_coeffs_b32_16bpp(VP9Context *s, int16_t *coef, int n_coeffs,
2294 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2295 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2296 const int16_t (*nb)[2], const int16_t *band_counts,
2297 const int16_t *qmul)
2299 return decode_coeffs_b_generic(&s->c, coef, n_coeffs, 1, 0, s->s.h.bpp, cnt, eob, p,
2300 nnz, scan, nb, band_counts, qmul);
2303 static av_always_inline int decode_coeffs(AVCodecContext *ctx, int is8bitsperpixel)
2305 VP9Context *s = ctx->priv_data;
2307 int row = s->row, col = s->col;
2308 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2309 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2310 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2311 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2312 int end_x = FFMIN(2 * (s->cols - col), w4);
2313 int end_y = FFMIN(2 * (s->rows - row), h4);
2314 int n, pl, x, y, res;
2315 int16_t (*qmul)[2] = s->s.h.segmentation.feat[b->seg_id].qmul;
2316 int tx = 4 * s->s.h.lossless + b->tx;
2317 const int16_t * const *yscans = vp9_scans[tx];
2318 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2319 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2320 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2321 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2322 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2323 static const int16_t band_counts[4][8] = {
2324 { 1, 2, 3, 4, 3, 16 - 13 },
2325 { 1, 2, 3, 4, 11, 64 - 21 },
2326 { 1, 2, 3, 4, 11, 256 - 21 },
2327 { 1, 2, 3, 4, 11, 1024 - 21 },
2329 const int16_t *y_band_counts = band_counts[b->tx];
2330 const int16_t *uv_band_counts = band_counts[b->uvtx];
2331 int bytesperpixel = is8bitsperpixel ? 1 : 2;
2332 int total_coeff = 0;
2334 #define MERGE(la, end, step, rd) \
2335 for (n = 0; n < end; n += step) \
2336 la[n] = !!rd(&la[n])
2337 #define MERGE_CTX(step, rd) \
2339 MERGE(l, end_y, step, rd); \
2340 MERGE(a, end_x, step, rd); \
2343 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2344 for (n = 0, y = 0; y < end_y; y += step) { \
2345 for (x = 0; x < end_x; x += step, n += step * step) { \
2346 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2347 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2348 (s, s->block + 16 * n * bytesperpixel, 16 * step * step, \
2349 c, e, p, a[x] + l[y], yscans[txtp], \
2350 ynbs[txtp], y_band_counts, qmul[0]); \
2351 a[x] = l[y] = !!res; \
2352 total_coeff |= !!res; \
2354 AV_WN16A(&s->eob[n], res); \
2361 #define SPLAT(la, end, step, cond) \
2363 for (n = 1; n < end; n += step) \
2364 la[n] = la[n - 1]; \
2365 } else if (step == 4) { \
2367 for (n = 0; n < end; n += step) \
2368 AV_WN32A(&la[n], la[n] * 0x01010101); \
2370 for (n = 0; n < end; n += step) \
2371 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2373 } else /* step == 8 */ { \
2375 if (HAVE_FAST_64BIT) { \
2376 for (n = 0; n < end; n += step) \
2377 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2379 for (n = 0; n < end; n += step) { \
2380 uint32_t v32 = la[n] * 0x01010101; \
2381 AV_WN32A(&la[n], v32); \
2382 AV_WN32A(&la[n + 4], v32); \
2386 for (n = 0; n < end; n += step) \
2387 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2390 #define SPLAT_CTX(step) \
2392 SPLAT(a, end_x, step, end_x == w4); \
2393 SPLAT(l, end_y, step, end_y == h4); \
2399 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2402 MERGE_CTX(2, AV_RN16A);
2403 DECODE_Y_COEF_LOOP(2, 0,);
2407 MERGE_CTX(4, AV_RN32A);
2408 DECODE_Y_COEF_LOOP(4, 0,);
2412 MERGE_CTX(8, AV_RN64A);
2413 DECODE_Y_COEF_LOOP(8, 0, 32);
2418 #define DECODE_UV_COEF_LOOP(step, v) \
2419 for (n = 0, y = 0; y < end_y; y += step) { \
2420 for (x = 0; x < end_x; x += step, n += step * step) { \
2421 res = (is8bitsperpixel ? decode_coeffs_b##v##_8bpp : decode_coeffs_b##v##_16bpp) \
2422 (s, s->uvblock[pl] + 16 * n * bytesperpixel, \
2423 16 * step * step, c, e, p, a[x] + l[y], \
2424 uvscan, uvnb, uv_band_counts, qmul[1]); \
2425 a[x] = l[y] = !!res; \
2426 total_coeff |= !!res; \
2428 AV_WN16A(&s->uveob[pl][n], res); \
2430 s->uveob[pl][n] = res; \
2435 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2436 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2437 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2442 for (pl = 0; pl < 2; pl++) {
2443 a = &s->above_uv_nnz_ctx[pl][col << !s->ss_h];
2444 l = &s->left_uv_nnz_ctx[pl][(row & 7) << !s->ss_v];
2447 DECODE_UV_COEF_LOOP(1,);
2450 MERGE_CTX(2, AV_RN16A);
2451 DECODE_UV_COEF_LOOP(2,);
2455 MERGE_CTX(4, AV_RN32A);
2456 DECODE_UV_COEF_LOOP(4,);
2460 MERGE_CTX(8, AV_RN64A);
2461 DECODE_UV_COEF_LOOP(8, 32);
2470 static int decode_coeffs_8bpp(AVCodecContext *ctx)
2472 return decode_coeffs(ctx, 1);
2475 static int decode_coeffs_16bpp(AVCodecContext *ctx)
2477 return decode_coeffs(ctx, 0);
2480 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2481 uint8_t *dst_edge, ptrdiff_t stride_edge,
2482 uint8_t *dst_inner, ptrdiff_t stride_inner,
2483 uint8_t *l, int col, int x, int w,
2484 int row, int y, enum TxfmMode tx,
2485 int p, int ss_h, int ss_v, int bytesperpixel)
2487 int have_top = row > 0 || y > 0;
2488 int have_left = col > s->tile_col_start || x > 0;
2489 int have_right = x < w - 1;
2490 int bpp = s->s.h.bpp;
2491 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2492 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2493 { DC_127_PRED, VERT_PRED } },
2494 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2495 { HOR_PRED, HOR_PRED } },
2496 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2497 { LEFT_DC_PRED, DC_PRED } },
2498 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2499 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2500 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2501 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2502 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2503 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2504 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2505 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2506 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2507 { DC_127_PRED, VERT_LEFT_PRED } },
2508 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2509 { HOR_UP_PRED, HOR_UP_PRED } },
2510 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2511 { HOR_PRED, TM_VP8_PRED } },
2513 static const struct {
2514 uint8_t needs_left:1;
2515 uint8_t needs_top:1;
2516 uint8_t needs_topleft:1;
2517 uint8_t needs_topright:1;
2518 uint8_t invert_left:1;
2519 } edges[N_INTRA_PRED_MODES] = {
2520 [VERT_PRED] = { .needs_top = 1 },
2521 [HOR_PRED] = { .needs_left = 1 },
2522 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2523 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2524 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2525 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2526 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2527 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2528 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2529 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2530 [LEFT_DC_PRED] = { .needs_left = 1 },
2531 [TOP_DC_PRED] = { .needs_top = 1 },
2532 [DC_128_PRED] = { 0 },
2533 [DC_127_PRED] = { 0 },
2534 [DC_129_PRED] = { 0 }
2537 av_assert2(mode >= 0 && mode < 10);
2538 mode = mode_conv[mode][have_left][have_top];
2539 if (edges[mode].needs_top) {
2540 uint8_t *top, *topleft;
2541 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !ss_h) - x) * 4;
2542 int n_px_need_tr = 0;
2544 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2547 // if top of sb64-row, use s->intra_pred_data[] instead of
2548 // dst[-stride] for intra prediction (it contains pre- instead of
2549 // post-loopfilter data)
2551 top = !(row & 7) && !y ?
2552 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2553 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2555 topleft = !(row & 7) && !y ?
2556 s->intra_pred_data[p] + (col * (8 >> ss_h) + x * 4) * bytesperpixel :
2557 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2558 &dst_inner[-stride_inner];
2562 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2563 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2564 n_px_need + n_px_need_tr <= n_px_have) {
2568 if (n_px_need <= n_px_have) {
2569 memcpy(*a, top, n_px_need * bytesperpixel);
2571 #define memset_bpp(c, i1, v, i2, num) do { \
2572 if (bytesperpixel == 1) { \
2573 memset(&(c)[(i1)], (v)[(i2)], (num)); \
2575 int n, val = AV_RN16A(&(v)[(i2) * 2]); \
2576 for (n = 0; n < (num); n++) { \
2577 AV_WN16A(&(c)[((i1) + n) * 2], val); \
2581 memcpy(*a, top, n_px_have * bytesperpixel);
2582 memset_bpp(*a, n_px_have, (*a), n_px_have - 1, n_px_need - n_px_have);
2585 #define memset_val(c, val, num) do { \
2586 if (bytesperpixel == 1) { \
2587 memset((c), (val), (num)); \
2590 for (n = 0; n < (num); n++) { \
2591 AV_WN16A(&(c)[n * 2], (val)); \
2595 memset_val(*a, (128 << (bpp - 8)) - 1, n_px_need);
2597 if (edges[mode].needs_topleft) {
2598 if (have_left && have_top) {
2599 #define assign_bpp(c, i1, v, i2) do { \
2600 if (bytesperpixel == 1) { \
2601 (c)[(i1)] = (v)[(i2)]; \
2603 AV_COPY16(&(c)[(i1) * 2], &(v)[(i2) * 2]); \
2606 assign_bpp(*a, -1, topleft, -1);
2608 #define assign_val(c, i, v) do { \
2609 if (bytesperpixel == 1) { \
2612 AV_WN16A(&(c)[(i) * 2], (v)); \
2615 assign_val((*a), -1, (128 << (bpp - 8)) + (have_top ? +1 : -1));
2618 if (tx == TX_4X4 && edges[mode].needs_topright) {
2619 if (have_top && have_right &&
2620 n_px_need + n_px_need_tr <= n_px_have) {
2621 memcpy(&(*a)[4 * bytesperpixel], &top[4 * bytesperpixel], 4 * bytesperpixel);
2623 memset_bpp(*a, 4, *a, 3, 4);
2628 if (edges[mode].needs_left) {
2630 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !ss_v) - y) * 4;
2631 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2632 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2634 if (edges[mode].invert_left) {
2635 if (n_px_need <= n_px_have) {
2636 for (i = 0; i < n_px_need; i++)
2637 assign_bpp(l, i, &dst[i * stride], -1);
2639 for (i = 0; i < n_px_have; i++)
2640 assign_bpp(l, i, &dst[i * stride], -1);
2641 memset_bpp(l, n_px_have, l, n_px_have - 1, n_px_need - n_px_have);
2644 if (n_px_need <= n_px_have) {
2645 for (i = 0; i < n_px_need; i++)
2646 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2648 for (i = 0; i < n_px_have; i++)
2649 assign_bpp(l, n_px_need - 1 - i, &dst[i * stride], -1);
2650 memset_bpp(l, 0, l, n_px_need - n_px_have, n_px_need - n_px_have);
2654 memset_val(l, (128 << (bpp - 8)) + 1, 4 << tx);
2661 static av_always_inline void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off,
2662 ptrdiff_t uv_off, int bytesperpixel)
2664 VP9Context *s = ctx->priv_data;
2666 int row = s->row, col = s->col;
2667 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2668 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2669 int end_x = FFMIN(2 * (s->cols - col), w4);
2670 int end_y = FFMIN(2 * (s->rows - row), h4);
2671 int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
2672 int uvstep1d = 1 << b->uvtx, p;
2673 uint8_t *dst = s->dst[0], *dst_r = s->s.frames[CUR_FRAME].tf.f->data[0] + y_off;
2674 LOCAL_ALIGNED_32(uint8_t, a_buf, [96]);
2675 LOCAL_ALIGNED_32(uint8_t, l, [64]);
2677 for (n = 0, y = 0; y < end_y; y += step1d) {
2678 uint8_t *ptr = dst, *ptr_r = dst_r;
2679 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d * bytesperpixel,
2680 ptr_r += 4 * step1d * bytesperpixel, n += step) {
2681 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2683 uint8_t *a = &a_buf[32];
2684 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2685 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2687 mode = check_intra_mode(s, mode, &a, ptr_r,
2688 s->s.frames[CUR_FRAME].tf.f->linesize[0],
2689 ptr, s->y_stride, l,
2690 col, x, w4, row, y, b->tx, 0, 0, 0, bytesperpixel);
2691 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2693 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2694 s->block + 16 * n * bytesperpixel, eob);
2696 dst_r += 4 * step1d * s->s.frames[CUR_FRAME].tf.f->linesize[0];
2697 dst += 4 * step1d * s->y_stride;
2704 step = 1 << (b->uvtx * 2);
2705 for (p = 0; p < 2; p++) {
2706 dst = s->dst[1 + p];
2707 dst_r = s->s.frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2708 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2709 uint8_t *ptr = dst, *ptr_r = dst_r;
2710 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d * bytesperpixel,
2711 ptr_r += 4 * uvstep1d * bytesperpixel, n += step) {
2712 int mode = b->uvmode;
2713 uint8_t *a = &a_buf[32];
2714 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2716 mode = check_intra_mode(s, mode, &a, ptr_r,
2717 s->s.frames[CUR_FRAME].tf.f->linesize[1],
2718 ptr, s->uv_stride, l, col, x, w4, row, y,
2719 b->uvtx, p + 1, s->ss_h, s->ss_v, bytesperpixel);
2720 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2722 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2723 s->uvblock[p] + 16 * n * bytesperpixel, eob);
2725 dst_r += 4 * uvstep1d * s->s.frames[CUR_FRAME].tf.f->linesize[1];
2726 dst += 4 * uvstep1d * s->uv_stride;
2731 static void intra_recon_8bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2733 intra_recon(ctx, y_off, uv_off, 1);
2736 static void intra_recon_16bpp(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2738 intra_recon(ctx, y_off, uv_off, 2);
2741 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2742 uint8_t *dst, ptrdiff_t dst_stride,
2743 const uint8_t *ref, ptrdiff_t ref_stride,
2744 ThreadFrame *ref_frame,
2745 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2746 int bw, int bh, int w, int h, int bytesperpixel)
2748 int mx = mv->x, my = mv->y, th;
2752 ref += y * ref_stride + x * bytesperpixel;
2755 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2756 // we use +7 because the last 7 pixels of each sbrow can be changed in
2757 // the longest loopfilter of the next sbrow
2758 th = (y + bh + 4 * !!my + 7) >> 6;
2759 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2760 // The arm/aarch64 _hv filters read one more row than what actually is
2761 // needed, so switch to emulated edge one pixel sooner vertically
2762 // (!!my * 5) than horizontally (!!mx * 4).
2763 if (x < !!mx * 3 || y < !!my * 3 ||
2764 x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
2765 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2766 ref - !!my * 3 * ref_stride - !!mx * 3 * bytesperpixel,
2768 bw + !!mx * 7, bh + !!my * 7,
2769 x - !!mx * 3, y - !!my * 3, w, h);
2770 ref = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2773 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2776 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2777 uint8_t *dst_u, uint8_t *dst_v,
2778 ptrdiff_t dst_stride,
2779 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2780 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2781 ThreadFrame *ref_frame,
2782 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2783 int bw, int bh, int w, int h, int bytesperpixel)
2785 int mx = mv->x * (1 << !s->ss_h), my = mv->y * (1 << !s->ss_v), th;
2789 ref_u += y * src_stride_u + x * bytesperpixel;
2790 ref_v += y * src_stride_v + x * bytesperpixel;
2793 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2794 // we use +7 because the last 7 pixels of each sbrow can be changed in
2795 // the longest loopfilter of the next sbrow
2796 th = (y + bh + 4 * !!my + 7) >> (6 - s->ss_v);
2797 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2798 // The arm/aarch64 _hv filters read one more row than what actually is
2799 // needed, so switch to emulated edge one pixel sooner vertically
2800 // (!!my * 5) than horizontally (!!mx * 4).
2801 if (x < !!mx * 3 || y < !!my * 3 ||
2802 x + !!mx * 4 > w - bw || y + !!my * 5 > h - bh) {
2803 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2804 ref_u - !!my * 3 * src_stride_u - !!mx * 3 * bytesperpixel,
2806 bw + !!mx * 7, bh + !!my * 7,
2807 x - !!mx * 3, y - !!my * 3, w, h);
2808 ref_u = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2809 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 160, bh, mx, my);
2811 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2812 ref_v - !!my * 3 * src_stride_v - !!mx * 3 * bytesperpixel,
2814 bw + !!mx * 7, bh + !!my * 7,
2815 x - !!mx * 3, y - !!my * 3, w, h);
2816 ref_v = s->edge_emu_buffer + !!my * 3 * 160 + !!mx * 3 * bytesperpixel;
2817 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 160, bh, mx, my);
2819 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2820 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2824 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2825 px, py, pw, ph, bw, bh, w, h, i) \
2826 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2827 mv, bw, bh, w, h, bytesperpixel)
2828 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2829 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2830 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2831 row, col, mv, bw, bh, w, h, bytesperpixel)
2833 #define FN(x) x##_8bpp
2834 #define BYTES_PER_PIXEL 1
2835 #include "vp9_mc_template.c"
2837 #undef BYTES_PER_PIXEL
2838 #define FN(x) x##_16bpp
2839 #define BYTES_PER_PIXEL 2
2840 #include "vp9_mc_template.c"
2842 #undef mc_chroma_dir
2844 #undef BYTES_PER_PIXEL
2847 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2848 vp9_mc_func (*mc)[2],
2849 uint8_t *dst, ptrdiff_t dst_stride,
2850 const uint8_t *ref, ptrdiff_t ref_stride,
2851 ThreadFrame *ref_frame,
2852 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2853 int px, int py, int pw, int ph,
2854 int bw, int bh, int w, int h, int bytesperpixel,
2855 const uint16_t *scale, const uint8_t *step)
2857 if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2858 s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2859 mc_luma_unscaled(s, mc, dst, dst_stride, ref, ref_stride, ref_frame,
2860 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2862 #define scale_mv(n, dim) (((int64_t)(n) * scale[dim]) >> 14)
2864 int refbw_m1, refbh_m1;
2868 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
2869 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
2870 // BUG libvpx seems to scale the two components separately. This introduces
2871 // rounding errors but we have to reproduce them to be exactly compatible
2872 // with the output from libvpx...
2873 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2874 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2878 ref += y * ref_stride + x * bytesperpixel;
2881 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2882 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2883 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2884 // we use +7 because the last 7 pixels of each sbrow can be changed in
2885 // the longest loopfilter of the next sbrow
2886 th = (y + refbh_m1 + 4 + 7) >> 6;
2887 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2888 // The arm/aarch64 _hv filters read one more row than what actually is
2889 // needed, so switch to emulated edge one pixel sooner vertically
2890 // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
2891 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
2892 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2893 ref - 3 * ref_stride - 3 * bytesperpixel,
2895 refbw_m1 + 8, refbh_m1 + 8,
2896 x - 3, y - 3, w, h);
2897 ref = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2900 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2904 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2905 vp9_mc_func (*mc)[2],
2906 uint8_t *dst_u, uint8_t *dst_v,
2907 ptrdiff_t dst_stride,
2908 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2909 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2910 ThreadFrame *ref_frame,
2911 ptrdiff_t y, ptrdiff_t x, const VP56mv *in_mv,
2912 int px, int py, int pw, int ph,
2913 int bw, int bh, int w, int h, int bytesperpixel,
2914 const uint16_t *scale, const uint8_t *step)
2916 if (s->s.frames[CUR_FRAME].tf.f->width == ref_frame->f->width &&
2917 s->s.frames[CUR_FRAME].tf.f->height == ref_frame->f->height) {
2918 mc_chroma_unscaled(s, mc, dst_u, dst_v, dst_stride, ref_u, src_stride_u,
2919 ref_v, src_stride_v, ref_frame,
2920 y, x, in_mv, bw, bh, w, h, bytesperpixel);
2923 int refbw_m1, refbh_m1;
2928 // BUG https://code.google.com/p/webm/issues/detail?id=820
2929 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 16, (s->cols * 4 - x + px + 3) * 16);
2930 mx = scale_mv(mv.x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2932 mv.x = av_clip(in_mv->x, -(x + pw - px + 4) * 8, (s->cols * 8 - x + px + 3) * 8);
2933 mx = scale_mv(mv.x * 2, 0) + scale_mv(x * 16, 0);
2936 // BUG https://code.google.com/p/webm/issues/detail?id=820
2937 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 16, (s->rows * 4 - y + py + 3) * 16);
2938 my = scale_mv(mv.y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2940 mv.y = av_clip(in_mv->y, -(y + ph - py + 4) * 8, (s->rows * 8 - y + py + 3) * 8);
2941 my = scale_mv(mv.y * 2, 1) + scale_mv(y * 16, 1);
2946 ref_u += y * src_stride_u + x * bytesperpixel;
2947 ref_v += y * src_stride_v + x * bytesperpixel;
2950 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2951 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2952 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2953 // we use +7 because the last 7 pixels of each sbrow can be changed in
2954 // the longest loopfilter of the next sbrow
2955 th = (y + refbh_m1 + 4 + 7) >> (6 - s->ss_v);
2956 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2957 // The arm/aarch64 _hv filters read one more row than what actually is
2958 // needed, so switch to emulated edge one pixel sooner vertically
2959 // (y + 5 >= h - refbh_m1) than horizontally (x + 4 >= w - refbw_m1).
2960 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 5 >= h - refbh_m1) {
2961 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2962 ref_u - 3 * src_stride_u - 3 * bytesperpixel,
2964 refbw_m1 + 8, refbh_m1 + 8,
2965 x - 3, y - 3, w, h);
2966 ref_u = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2967 smc(dst_u, dst_stride, ref_u, 288, bh, mx, my, step[0], step[1]);
2969 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2970 ref_v - 3 * src_stride_v - 3 * bytesperpixel,
2972 refbw_m1 + 8, refbh_m1 + 8,
2973 x - 3, y - 3, w, h);
2974 ref_v = s->edge_emu_buffer + 3 * 288 + 3 * bytesperpixel;
2975 smc(dst_v, dst_stride, ref_v, 288, bh, mx, my, step[0], step[1]);
2977 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2978 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2983 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, \
2984 px, py, pw, ph, bw, bh, w, h, i) \
2985 mc_luma_scaled(s, s->dsp.s##mc, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2986 mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2987 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2988 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2989 row, col, mv, px, py, pw, ph, bw, bh, w, h, i) \
2990 mc_chroma_scaled(s, s->dsp.s##mc, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2991 row, col, mv, px, py, pw, ph, bw, bh, w, h, bytesperpixel, \
2992 s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2994 #define FN(x) x##_scaled_8bpp
2995 #define BYTES_PER_PIXEL 1
2996 #include "vp9_mc_template.c"
2998 #undef BYTES_PER_PIXEL
2999 #define FN(x) x##_scaled_16bpp
3000 #define BYTES_PER_PIXEL 2
3001 #include "vp9_mc_template.c"
3003 #undef mc_chroma_dir
3005 #undef BYTES_PER_PIXEL
3008 static av_always_inline void inter_recon(AVCodecContext *ctx, int bytesperpixel)
3010 VP9Context *s = ctx->priv_data;
3012 int row = s->row, col = s->col;
3014 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
3015 if (bytesperpixel == 1) {
3016 inter_pred_scaled_8bpp(ctx);
3018 inter_pred_scaled_16bpp(ctx);
3021 if (bytesperpixel == 1) {
3022 inter_pred_8bpp(ctx);
3024 inter_pred_16bpp(ctx);
3028 /* mostly copied intra_recon() */
3030 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
3031 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
3032 int end_x = FFMIN(2 * (s->cols - col), w4);
3033 int end_y = FFMIN(2 * (s->rows - row), h4);
3034 int tx = 4 * s->s.h.lossless + b->tx, uvtx = b->uvtx + 4 * s->s.h.lossless;
3035 int uvstep1d = 1 << b->uvtx, p;
3036 uint8_t *dst = s->dst[0];
3039 for (n = 0, y = 0; y < end_y; y += step1d) {
3041 for (x = 0; x < end_x; x += step1d,
3042 ptr += 4 * step1d * bytesperpixel, n += step) {
3043 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
3046 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
3047 s->block + 16 * n * bytesperpixel, eob);
3049 dst += 4 * s->y_stride * step1d;
3055 step = 1 << (b->uvtx * 2);
3056 for (p = 0; p < 2; p++) {
3057 dst = s->dst[p + 1];
3058 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
3060 for (x = 0; x < end_x; x += uvstep1d,
3061 ptr += 4 * uvstep1d * bytesperpixel, n += step) {
3062 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
3065 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
3066 s->uvblock[p] + 16 * n * bytesperpixel, eob);
3068 dst += 4 * uvstep1d * s->uv_stride;
3074 static void inter_recon_8bpp(AVCodecContext *ctx)
3076 inter_recon(ctx, 1);
3079 static void inter_recon_16bpp(AVCodecContext *ctx)
3081 inter_recon(ctx, 2);
3084 static av_always_inline void mask_edges(uint8_t (*mask)[8][4], int ss_h, int ss_v,
3085 int row_and_7, int col_and_7,
3086 int w, int h, int col_end, int row_end,
3087 enum TxfmMode tx, int skip_inter)
3089 static const unsigned wide_filter_col_mask[2] = { 0x11, 0x01 };
3090 static const unsigned wide_filter_row_mask[2] = { 0x03, 0x07 };
3092 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
3093 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
3094 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
3095 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
3097 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
3098 // edges. This means that for UV, we work on two subsampled blocks at
3099 // a time, and we only use the topleft block's mode information to set
3100 // things like block strength. Thus, for any block size smaller than
3101 // 16x16, ignore the odd portion of the block.
3102 if (tx == TX_4X4 && (ss_v | ss_h)) {
3117 if (tx == TX_4X4 && !skip_inter) {
3118 int t = 1 << col_and_7, m_col = (t << w) - t, y;
3119 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
3120 int m_row_8 = m_col & wide_filter_col_mask[ss_h], m_row_4 = m_col - m_row_8;
3122 for (y = row_and_7; y < h + row_and_7; y++) {
3123 int col_mask_id = 2 - !(y & wide_filter_row_mask[ss_v]);
3125 mask[0][y][1] |= m_row_8;
3126 mask[0][y][2] |= m_row_4;
3127 // for odd lines, if the odd col is not being filtered,
3128 // skip odd row also:
3135 // if a/c are even row/col and b/d are odd, and d is skipped,
3136 // e.g. right edge of size-66x66.webm, then skip b also (bug)
3137 if ((ss_h & ss_v) && (col_end & 1) && (y & 1)) {
3138 mask[1][y][col_mask_id] |= (t << (w - 1)) - t;
3140 mask[1][y][col_mask_id] |= m_col;
3143 mask[0][y][3] |= m_col;
3145 if (ss_h && (col_end & 1))
3146 mask[1][y][3] |= (t << (w - 1)) - t;
3148 mask[1][y][3] |= m_col;
3152 int y, t = 1 << col_and_7, m_col = (t << w) - t;
3155 int mask_id = (tx == TX_8X8);
3156 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
3157 int l2 = tx + ss_h - 1, step1d;
3158 int m_row = m_col & masks[l2];
3160 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
3161 // 8wd loopfilter to prevent going off the visible edge.
3162 if (ss_h && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
3163 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
3164 int m_row_8 = m_row - m_row_16;
3166 for (y = row_and_7; y < h + row_and_7; y++) {
3167 mask[0][y][0] |= m_row_16;
3168 mask[0][y][1] |= m_row_8;
3171 for (y = row_and_7; y < h + row_and_7; y++)
3172 mask[0][y][mask_id] |= m_row;
3177 if (ss_v && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
3178 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
3179 mask[1][y][0] |= m_col;
3180 if (y - row_and_7 == h - 1)
3181 mask[1][y][1] |= m_col;
3183 for (y = row_and_7; y < h + row_and_7; y += step1d)
3184 mask[1][y][mask_id] |= m_col;
3186 } else if (tx != TX_4X4) {
3189 mask_id = (tx == TX_8X8) || (h == ss_v);
3190 mask[1][row_and_7][mask_id] |= m_col;
3191 mask_id = (tx == TX_8X8) || (w == ss_h);
3192 for (y = row_and_7; y < h + row_and_7; y++)
3193 mask[0][y][mask_id] |= t;
3195 int t8 = t & wide_filter_col_mask[ss_h], t4 = t - t8;
3197 for (y = row_and_7; y < h + row_and_7; y++) {
3198 mask[0][y][2] |= t4;
3199 mask[0][y][1] |= t8;
3201 mask[1][row_and_7][2 - !(row_and_7 & wide_filter_row_mask[ss_v])] |= m_col;
3206 static void decode_b(AVCodecContext *ctx, int row, int col,
3207 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
3208 enum BlockLevel bl, enum BlockPartition bp)
3210 VP9Context *s = ctx->priv_data;
3212 enum BlockSize bs = bl * 3 + bp;
3213 int bytesperpixel = s->bytesperpixel;
3214 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
3216 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3222 s->min_mv.x = -(128 + col * 64);
3223 s->min_mv.y = -(128 + row * 64);
3224 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
3225 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
3231 b->uvtx = b->tx - ((s->ss_h && w4 * 2 == (1 << b->tx)) ||
3232 (s->ss_v && h4 * 2 == (1 << b->tx)));
3237 if (bytesperpixel == 1) {
3238 has_coeffs = decode_coeffs_8bpp(ctx);
3240 has_coeffs = decode_coeffs_16bpp(ctx);
3242 if (!has_coeffs && b->bs <= BS_8x8 && !b->intra) {
3244 memset(&s->above_skip_ctx[col], 1, w4);
3245 memset(&s->left_skip_ctx[s->row7], 1, h4);
3250 #define SPLAT_ZERO_CTX(v, n) \
3252 case 1: v = 0; break; \
3253 case 2: AV_ZERO16(&v); break; \
3254 case 4: AV_ZERO32(&v); break; \
3255 case 8: AV_ZERO64(&v); break; \
3256 case 16: AV_ZERO128(&v); break; \
3258 #define SPLAT_ZERO_YUV(dir, var, off, n, dir2) \
3260 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
3261 if (s->ss_##dir2) { \
3262 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
3263 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
3265 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off * 2], n * 2); \
3266 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off * 2], n * 2); \
3271 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1, h); break;
3272 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2, h); break;
3273 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4, h); break;
3274 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8, h); break;
3277 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1, v); break;
3278 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2, v); break;
3279 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4, v); break;
3280 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8, v); break;
3286 s->block += w4 * h4 * 64 * bytesperpixel;
3287 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3288 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_h + s->ss_v);
3289 s->eob += 4 * w4 * h4;
3290 s->uveob[0] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3291 s->uveob[1] += 4 * w4 * h4 >> (s->ss_h + s->ss_v);
3297 // emulated overhangs if the stride of the target buffer can't hold. This
3298 // makes it possible to support emu-edge and so on even if we have large block
3300 emu[0] = (col + w4) * 8 * bytesperpixel > f->linesize[0] ||
3301 (row + h4) > s->rows;
3302 emu[1] = ((col + w4) * 8 >> s->ss_h) * bytesperpixel > f->linesize[1] ||
3303 (row + h4) > s->rows;
3305 s->dst[0] = s->tmp_y;
3308 s->dst[0] = f->data[0] + yoff;
3309 s->y_stride = f->linesize[0];
3312 s->dst[1] = s->tmp_uv[0];
3313 s->dst[2] = s->tmp_uv[1];
3316 s->dst[1] = f->data[1] + uvoff;
3317 s->dst[2] = f->data[2] + uvoff;
3318 s->uv_stride = f->linesize[1];
3321 if (s->s.h.bpp > 8) {
3322 intra_recon_16bpp(ctx, yoff, uvoff);
3324 intra_recon_8bpp(ctx, yoff, uvoff);
3327 if (s->s.h.bpp > 8) {
3328 inter_recon_16bpp(ctx);
3330 inter_recon_8bpp(ctx);
3334 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3336 for (n = 0; o < w; n++) {
3341 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o * bytesperpixel, f->linesize[0],
3342 s->tmp_y + o * bytesperpixel, 128, h, 0, 0);
3348 int w = FFMIN(s->cols - col, w4) * 8 >> s->ss_h;
3349 int h = FFMIN(s->rows - row, h4) * 8 >> s->ss_v, n, o = 0;
3351 for (n = s->ss_h; o < w; n++) {
3356 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o * bytesperpixel, f->linesize[1],
3357 s->tmp_uv[0] + o * bytesperpixel, 128, h, 0, 0);
3358 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o * bytesperpixel, f->linesize[2],
3359 s->tmp_uv[1] + o * bytesperpixel, 128, h, 0, 0);
3365 // pick filter level and find edges to apply filter to
3366 if (s->s.h.filter.level &&
3367 (lvl = s->s.h.segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3368 [b->mode[3] != ZEROMV]) > 0) {
3369 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3370 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3372 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3373 mask_edges(lflvl->mask[0], 0, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3374 if (s->ss_h || s->ss_v)
3375 mask_edges(lflvl->mask[1], s->ss_h, s->ss_v, row7, col7, x_end, y_end,
3376 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3377 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3378 b->uvtx, skip_inter);
3380 if (!s->filter_lut.lim_lut[lvl]) {
3381 int sharp = s->s.h.filter.sharpness;
3385 limit >>= (sharp + 3) >> 2;
3386 limit = FFMIN(limit, 9 - sharp);
3388 limit = FFMAX(limit, 1);
3390 s->filter_lut.lim_lut[lvl] = limit;
3391 s->filter_lut.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3397 s->block += w4 * h4 * 64 * bytesperpixel;
3398 s->uvblock[0] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3399 s->uvblock[1] += w4 * h4 * 64 * bytesperpixel >> (s->ss_v + s->ss_h);
3400 s->eob += 4 * w4 * h4;
3401 s->uveob[0] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3402 s->uveob[1] += 4 * w4 * h4 >> (s->ss_v + s->ss_h);
3406 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3407 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3409 VP9Context *s = ctx->priv_data;
3410 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3411 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3412 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? vp9_default_kf_partition_probs[bl][c] :
3413 s->prob.p.partition[bl][c];
3414 enum BlockPartition bp;
3415 ptrdiff_t hbs = 4 >> bl;
3416 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3417 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3418 int bytesperpixel = s->bytesperpixel;
3421 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3422 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3423 } else if (col + hbs < s->cols) { // FIXME why not <=?
3424 if (row + hbs < s->rows) { // FIXME why not <=?
3425 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3427 case PARTITION_NONE:
3428 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3431 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3432 yoff += hbs * 8 * y_stride;
3433 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3434 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3437 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3438 yoff += hbs * 8 * bytesperpixel;
3439 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3440 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3442 case PARTITION_SPLIT:
3443 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3444 decode_sb(ctx, row, col + hbs, lflvl,
3445 yoff + 8 * hbs * bytesperpixel,
3446 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3447 yoff += hbs * 8 * y_stride;
3448 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3449 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3450 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3451 yoff + 8 * hbs * bytesperpixel,
3452 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3457 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3458 bp = PARTITION_SPLIT;
3459 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3460 decode_sb(ctx, row, col + hbs, lflvl,
3461 yoff + 8 * hbs * bytesperpixel,
3462 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3465 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3467 } else if (row + hbs < s->rows) { // FIXME why not <=?
3468 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3469 bp = PARTITION_SPLIT;
3470 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3471 yoff += hbs * 8 * y_stride;
3472 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3473 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3476 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3479 bp = PARTITION_SPLIT;
3480 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3482 s->counts.partition[bl][c][bp]++;
3485 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3486 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3488 VP9Context *s = ctx->priv_data;
3490 ptrdiff_t hbs = 4 >> bl;
3491 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3492 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3493 int bytesperpixel = s->bytesperpixel;
3496 av_assert2(b->bl == BL_8X8);
3497 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3498 } else if (s->b->bl == bl) {
3499 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3500 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3501 yoff += hbs * 8 * y_stride;
3502 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3503 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3504 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3505 yoff += hbs * 8 * bytesperpixel;
3506 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3507 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3510 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3511 if (col + hbs < s->cols) { // FIXME why not <=?
3512 if (row + hbs < s->rows) {
3513 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
3514 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3515 yoff += hbs * 8 * y_stride;
3516 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3517 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3518 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3519 yoff + 8 * hbs * bytesperpixel,
3520 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
3522 yoff += hbs * 8 * bytesperpixel;
3523 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
3524 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3526 } else if (row + hbs < s->rows) {
3527 yoff += hbs * 8 * y_stride;
3528 uvoff += hbs * 8 * uv_stride >> s->ss_v;
3529 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3534 static av_always_inline void filter_plane_cols(VP9Context *s, int col, int ss_h, int ss_v,
3535 uint8_t *lvl, uint8_t (*mask)[4],
3536 uint8_t *dst, ptrdiff_t ls)
3538 int y, x, bytesperpixel = s->bytesperpixel;
3540 // filter edges between columns (e.g. block1 | block2)
3541 for (y = 0; y < 8; y += 2 << ss_v, dst += 16 * ls, lvl += 16 << ss_v) {
3542 uint8_t *ptr = dst, *l = lvl, *hmask1 = mask[y], *hmask2 = mask[y + 1 + ss_v];
3543 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3544 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3545 unsigned hm = hm1 | hm2 | hm13 | hm23;
3547 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8 * bytesperpixel >> ss_h) {
3550 int L = *l, H = L >> 4;
3551 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3553 if (hmask1[0] & x) {
3554 if (hmask2[0] & x) {
3555 av_assert2(l[8 << ss_v] == L);
3556 s->dsp.loop_filter_16[0](ptr, ls, E, I, H);
3558 s->dsp.loop_filter_8[2][0](ptr, ls, E, I, H);
3560 } else if (hm2 & x) {
3563 E |= s->filter_lut.mblim_lut[L] << 8;
3564 I |= s->filter_lut.lim_lut[L] << 8;
3565 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3567 [0](ptr, ls, E, I, H);
3569 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3570 [0](ptr, ls, E, I, H);
3572 } else if (hm2 & x) {
3573 int L = l[8 << ss_v], H = L >> 4;
3574 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3576 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3577 [0](ptr + 8 * ls, ls, E, I, H);
3585 int L = *l, H = L >> 4;
3586 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3591 E |= s->filter_lut.mblim_lut[L] << 8;
3592 I |= s->filter_lut.lim_lut[L] << 8;
3593 s->dsp.loop_filter_mix2[0][0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3595 s->dsp.loop_filter_8[0][0](ptr + 4 * bytesperpixel, ls, E, I, H);
3597 } else if (hm23 & x) {
3598 int L = l[8 << ss_v], H = L >> 4;
3599 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3601 s->dsp.loop_filter_8[0][0](ptr + 8 * ls + 4 * bytesperpixel, ls, E, I, H);
3609 static av_always_inline void filter_plane_rows(VP9Context *s, int row, int ss_h, int ss_v,
3610 uint8_t *lvl, uint8_t (*mask)[4],
3611 uint8_t *dst, ptrdiff_t ls)
3613 int y, x, bytesperpixel = s->bytesperpixel;
3616 // filter edges between rows (e.g. ------)
3618 for (y = 0; y < 8; y++, dst += 8 * ls >> ss_v) {
3619 uint8_t *ptr = dst, *l = lvl, *vmask = mask[y];
3620 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3622 for (x = 1; vm & ~(x - 1); x <<= (2 << ss_h), ptr += 16 * bytesperpixel, l += 2 << ss_h) {
3625 int L = *l, H = L >> 4;
3626 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3629 if (vmask[0] & (x << (1 + ss_h))) {
3630 av_assert2(l[1 + ss_h] == L);
3631 s->dsp.loop_filter_16[1](ptr, ls, E, I, H);
3633 s->dsp.loop_filter_8[2][1](ptr, ls, E, I, H);
3635 } else if (vm & (x << (1 + ss_h))) {
3638 E |= s->filter_lut.mblim_lut[L] << 8;
3639 I |= s->filter_lut.lim_lut[L] << 8;
3640 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3641 [!!(vmask[1] & (x << (1 + ss_h)))]
3642 [1](ptr, ls, E, I, H);
3644 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3645 [1](ptr, ls, E, I, H);
3647 } else if (vm & (x << (1 + ss_h))) {
3648 int L = l[1 + ss_h], H = L >> 4;
3649 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3651 s->dsp.loop_filter_8[!!(vmask[1] & (x << (1 + ss_h)))]
3652 [1](ptr + 8 * bytesperpixel, ls, E, I, H);
3657 int L = *l, H = L >> 4;
3658 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3660 if (vm3 & (x << (1 + ss_h))) {
3663 E |= s->filter_lut.mblim_lut[L] << 8;
3664 I |= s->filter_lut.lim_lut[L] << 8;
3665 s->dsp.loop_filter_mix2[0][0][1](ptr + ls * 4, ls, E, I, H);
3667 s->dsp.loop_filter_8[0][1](ptr + ls * 4, ls, E, I, H);
3669 } else if (vm3 & (x << (1 + ss_h))) {
3670 int L = l[1 + ss_h], H = L >> 4;
3671 int E = s->filter_lut.mblim_lut[L], I = s->filter_lut.lim_lut[L];
3673 s->dsp.loop_filter_8[0][1](ptr + ls * 4 + 8 * bytesperpixel, ls, E, I, H);
3686 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3687 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3689 VP9Context *s = ctx->priv_data;
3690 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
3691 uint8_t *dst = f->data[0] + yoff;
3692 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3693 uint8_t (*uv_masks)[8][4] = lflvl->mask[s->ss_h | s->ss_v];
3696 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3697 // if you think of them as acting on a 8x8 block max, we can interleave
3698 // each v/h within the single x loop, but that only works if we work on
3699 // 8 pixel blocks, and we won't always do that (we want at least 16px
3700 // to use SSE2 optimizations, perhaps 32 for AVX2)
3702 filter_plane_cols(s, col, 0, 0, lflvl->level, lflvl->mask[0][0], dst, ls_y);
3703 filter_plane_rows(s, row, 0, 0, lflvl->level, lflvl->mask[0][1], dst, ls_y);
3705 for (p = 0; p < 2; p++) {
3706 dst = f->data[1 + p] + uvoff;
3707 filter_plane_cols(s, col, s->ss_h, s->ss_v, lflvl->level, uv_masks[0], dst, ls_uv);
3708 filter_plane_rows(s, row, s->ss_h, s->ss_v, lflvl->level, uv_masks[1], dst, ls_uv);
3712 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3714 int sb_start = ( idx * n) >> log2_n;
3715 int sb_end = ((idx + 1) * n) >> log2_n;
3716 *start = FFMIN(sb_start, n) << 3;
3717 *end = FFMIN(sb_end, n) << 3;
3720 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3721 int max_count, int update_factor)
3723 unsigned ct = ct0 + ct1, p2, p1;
3728 update_factor = FASTDIV(update_factor * FFMIN(ct, max_count), max_count);
3730 p2 = ((((int64_t) ct0) << 8) + (ct >> 1)) / ct;
3731 p2 = av_clip(p2, 1, 255);
3733 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3734 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3737 static void adapt_probs(VP9Context *s)
3740 prob_context *p = &s->prob_ctx[s->s.h.framectxid].p;
3741 int uf = (s->s.h.keyframe || s->s.h.intraonly || !s->last_keyframe) ? 112 : 128;
3744 for (i = 0; i < 4; i++)
3745 for (j = 0; j < 2; j++)
3746 for (k = 0; k < 2; k++)
3747 for (l = 0; l < 6; l++)
3748 for (m = 0; m < 6; m++) {
3749 uint8_t *pp = s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m];
3750 unsigned *e = s->counts.eob[i][j][k][l][m];
3751 unsigned *c = s->counts.coef[i][j][k][l][m];
3753 if (l == 0 && m >= 3) // dc only has 3 pt
3756 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3757 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3758 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3761 if (s->s.h.keyframe || s->s.h.intraonly) {
3762 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3763 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3764 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3765 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3770 for (i = 0; i < 3; i++)
3771 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3774 for (i = 0; i < 4; i++)
3775 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3778 if (s->s.h.comppredmode == PRED_SWITCHABLE) {
3779 for (i = 0; i < 5; i++)
3780 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3784 if (s->s.h.comppredmode != PRED_SINGLEREF) {
3785 for (i = 0; i < 5; i++)
3786 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3787 s->counts.comp_ref[i][1], 20, 128);
3790 if (s->s.h.comppredmode != PRED_COMPREF) {
3791 for (i = 0; i < 5; i++) {
3792 uint8_t *pp = p->single_ref[i];
3793 unsigned (*c)[2] = s->counts.single_ref[i];
3795 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3796 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3800 // block partitioning
3801 for (i = 0; i < 4; i++)
3802 for (j = 0; j < 4; j++) {
3803 uint8_t *pp = p->partition[i][j];
3804 unsigned *c = s->counts.partition[i][j];
3806 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3807 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3808 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3812 if (s->s.h.txfmmode == TX_SWITCHABLE) {
3813 for (i = 0; i < 2; i++) {
3814 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3816 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3817 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3818 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3819 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3820 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3821 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3825 // interpolation filter
3826 if (s->s.h.filtermode == FILTER_SWITCHABLE) {
3827 for (i = 0; i < 4; i++) {
3828 uint8_t *pp = p->filter[i];
3829 unsigned *c = s->counts.filter[i];
3831 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3832 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3837 for (i = 0; i < 7; i++) {
3838 uint8_t *pp = p->mv_mode[i];
3839 unsigned *c = s->counts.mv_mode[i];
3841 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3842 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3843 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3848 uint8_t *pp = p->mv_joint;
3849 unsigned *c = s->counts.mv_joint;
3851 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3852 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3853 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3857 for (i = 0; i < 2; i++) {
3859 unsigned *c, (*c2)[2], sum;
3861 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3862 s->counts.mv_comp[i].sign[1], 20, 128);
3864 pp = p->mv_comp[i].classes;
3865 c = s->counts.mv_comp[i].classes;
3866 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3867 adapt_prob(&pp[0], c[0], sum, 20, 128);
3869 adapt_prob(&pp[1], c[1], sum, 20, 128);
3871 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3872 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3874 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3875 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3877 adapt_prob(&pp[6], c[6], sum, 20, 128);
3878 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3879 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3880 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3882 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3883 s->counts.mv_comp[i].class0[1], 20, 128);
3884 pp = p->mv_comp[i].bits;
3885 c2 = s->counts.mv_comp[i].bits;
3886 for (j = 0; j < 10; j++)
3887 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3889 for (j = 0; j < 2; j++) {
3890 pp = p->mv_comp[i].class0_fp[j];
3891 c = s->counts.mv_comp[i].class0_fp[j];
3892 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3893 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3894 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3896 pp = p->mv_comp[i].fp;
3897 c = s->counts.mv_comp[i].fp;
3898 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3899 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3900 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3902 if (s->s.h.highprecisionmvs) {
3903 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3904 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3905 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3906 s->counts.mv_comp[i].hp[1], 20, 128);
3911 for (i = 0; i < 4; i++) {
3912 uint8_t *pp = p->y_mode[i];
3913 unsigned *c = s->counts.y_mode[i], sum, s2;
3915 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3916 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3917 sum -= c[TM_VP8_PRED];
3918 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3919 sum -= c[VERT_PRED];
3920 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3921 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3923 adapt_prob(&pp[3], s2, sum, 20, 128);
3925 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3926 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3927 sum -= c[DIAG_DOWN_LEFT_PRED];
3928 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3929 sum -= c[VERT_LEFT_PRED];
3930 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3931 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3935 for (i = 0; i < 10; i++) {
3936 uint8_t *pp = p->uv_mode[i];
3937 unsigned *c = s->counts.uv_mode[i], sum, s2;
3939 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3940 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3941 sum -= c[TM_VP8_PRED];
3942 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3943 sum -= c[VERT_PRED];
3944 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3945 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3947 adapt_prob(&pp[3], s2, sum, 20, 128);
3949 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3950 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3951 sum -= c[DIAG_DOWN_LEFT_PRED];
3952 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3953 sum -= c[VERT_LEFT_PRED];
3954 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3955 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3959 static void free_buffers(VP9Context *s)
3961 av_freep(&s->intra_pred_data[0]);
3962 av_freep(&s->b_base);
3963 av_freep(&s->block_base);
3966 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3968 VP9Context *s = ctx->priv_data;
3971 for (i = 0; i < 3; i++) {
3972 if (s->s.frames[i].tf.f->buf[0])
3973 vp9_unref_frame(ctx, &s->s.frames[i]);
3974 av_frame_free(&s->s.frames[i].tf.f);
3976 for (i = 0; i < 8; i++) {
3977 if (s->s.refs[i].f->buf[0])
3978 ff_thread_release_buffer(ctx, &s->s.refs[i]);
3979 av_frame_free(&s->s.refs[i].f);
3980 if (s->next_refs[i].f->buf[0])
3981 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3982 av_frame_free(&s->next_refs[i].f);
3992 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3993 int *got_frame, AVPacket *pkt)
3995 const uint8_t *data = pkt->data;
3996 int size = pkt->size;
3997 VP9Context *s = ctx->priv_data;
3998 int res, tile_row, tile_col, i, ref, row, col;
3999 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
4000 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
4001 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
4005 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
4007 } else if (res == 0) {
4008 if (!s->s.refs[ref].f->buf[0]) {
4009 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
4010 return AVERROR_INVALIDDATA;
4012 if ((res = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
4014 ((AVFrame *)frame)->pts = pkt->pts;
4016 FF_DISABLE_DEPRECATION_WARNINGS
4017 ((AVFrame *)frame)->pkt_pts = pkt->pts;
4018 FF_ENABLE_DEPRECATION_WARNINGS
4020 ((AVFrame *)frame)->pkt_dts = pkt->dts;
4021 for (i = 0; i < 8; i++) {
4022 if (s->next_refs[i].f->buf[0])
4023 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4024 if (s->s.refs[i].f->buf[0] &&
4025 (res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
4034 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
4035 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
4036 vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
4037 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
4038 (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
4041 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
4042 vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR]);
4043 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
4044 (res = vp9_ref_frame(ctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
4046 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
4047 vp9_unref_frame(ctx, &s->s.frames[CUR_FRAME]);
4048 if ((res = vp9_alloc_frame(ctx, &s->s.frames[CUR_FRAME])) < 0)
4050 f = s->s.frames[CUR_FRAME].tf.f;
4051 f->key_frame = s->s.h.keyframe;
4052 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
4053 ls_y = f->linesize[0];
4054 ls_uv =f->linesize[1];
4056 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
4057 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
4058 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
4059 vp9_unref_frame(ctx, &s->s.frames[REF_FRAME_SEGMAP]);
4063 for (i = 0; i < 8; i++) {
4064 if (s->next_refs[i].f->buf[0])
4065 ff_thread_release_buffer(ctx, &s->next_refs[i]);
4066 if (s->s.h.refreshrefmask & (1 << i)) {
4067 res = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
4068 } else if (s->s.refs[i].f->buf[0]) {
4069 res = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
4076 res = ctx->hwaccel->start_frame(ctx, NULL, 0);
4079 res = ctx->hwaccel->decode_slice(ctx, pkt->data, pkt->size);
4082 res = ctx->hwaccel->end_frame(ctx);
4088 // main tile decode loop
4089 bytesperpixel = s->bytesperpixel;
4090 memset(s->above_partition_ctx, 0, s->cols);
4091 memset(s->above_skip_ctx, 0, s->cols);
4092 if (s->s.h.keyframe || s->s.h.intraonly) {
4093 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
4095 memset(s->above_mode_ctx, NEARESTMV, s->cols);
4097 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
4098 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
4099 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
4100 memset(s->above_segpred_ctx, 0, s->cols);
4101 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
4102 ctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
4103 if ((res = update_block_buffers(ctx)) < 0) {
4104 av_log(ctx, AV_LOG_ERROR,
4105 "Failed to allocate block buffers\n");
4108 if (s->s.h.refreshctx && s->s.h.parallelmode) {
4111 for (i = 0; i < 4; i++) {
4112 for (j = 0; j < 2; j++)
4113 for (k = 0; k < 2; k++)
4114 for (l = 0; l < 6; l++)
4115 for (m = 0; m < 6; m++)
4116 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
4117 s->prob.coef[i][j][k][l][m], 3);
4118 if (s->s.h.txfmmode == i)
4121 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
4122 ff_thread_finish_setup(ctx);
4123 } else if (!s->s.h.refreshctx) {
4124 ff_thread_finish_setup(ctx);
4130 s->block = s->block_base;
4131 s->uvblock[0] = s->uvblock_base[0];
4132 s->uvblock[1] = s->uvblock_base[1];
4133 s->eob = s->eob_base;
4134 s->uveob[0] = s->uveob_base[0];
4135 s->uveob[1] = s->uveob_base[1];
4137 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
4138 set_tile_offset(&s->tile_row_start, &s->tile_row_end,
4139 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
4141 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
4144 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
4145 tile_row == s->s.h.tiling.tile_rows - 1) {
4148 tile_size = AV_RB32(data);
4152 if (tile_size > size) {
4153 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4154 return AVERROR_INVALIDDATA;
4156 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
4157 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
4158 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4159 return AVERROR_INVALIDDATA;
4166 for (row = s->tile_row_start; row < s->tile_row_end;
4167 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
4168 struct VP9Filter *lflvl_ptr = s->lflvl;
4169 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
4171 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
4172 set_tile_offset(&s->tile_col_start, &s->tile_col_end,
4173 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
4176 memset(s->left_partition_ctx, 0, 8);
4177 memset(s->left_skip_ctx, 0, 8);
4178 if (s->s.h.keyframe || s->s.h.intraonly) {
4179 memset(s->left_mode_ctx, DC_PRED, 16);
4181 memset(s->left_mode_ctx, NEARESTMV, 8);
4183 memset(s->left_y_nnz_ctx, 0, 16);
4184 memset(s->left_uv_nnz_ctx, 0, 32);
4185 memset(s->left_segpred_ctx, 0, 8);
4187 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
4190 for (col = s->tile_col_start;
4191 col < s->tile_col_end;
4192 col += 8, yoff2 += 64 * bytesperpixel,
4193 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4194 // FIXME integrate with lf code (i.e. zero after each
4195 // use, similar to invtxfm coefficients, or similar)
4197 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
4201 decode_sb_mem(ctx, row, col, lflvl_ptr,
4202 yoff2, uvoff2, BL_64X64);
4204 decode_sb(ctx, row, col, lflvl_ptr,
4205 yoff2, uvoff2, BL_64X64);
4209 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
4217 // backup pre-loopfilter reconstruction data for intra
4218 // prediction of next row of sb64s
4219 if (row + 8 < s->rows) {
4220 memcpy(s->intra_pred_data[0],
4221 f->data[0] + yoff + 63 * ls_y,
4222 8 * s->cols * bytesperpixel);
4223 memcpy(s->intra_pred_data[1],
4224 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4225 8 * s->cols * bytesperpixel >> s->ss_h);
4226 memcpy(s->intra_pred_data[2],
4227 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
4228 8 * s->cols * bytesperpixel >> s->ss_h);
4231 // loopfilter one row
4232 if (s->s.h.filter.level) {
4235 lflvl_ptr = s->lflvl;
4236 for (col = 0; col < s->cols;
4237 col += 8, yoff2 += 64 * bytesperpixel,
4238 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
4239 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
4243 // FIXME maybe we can make this more finegrained by running the
4244 // loopfilter per-block instead of after each sbrow
4245 // In fact that would also make intra pred left preparation easier?
4246 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
4250 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
4252 ff_thread_finish_setup(ctx);
4254 } while (s->pass++ == 1);
4255 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
4259 for (i = 0; i < 8; i++) {
4260 if (s->s.refs[i].f->buf[0])
4261 ff_thread_release_buffer(ctx, &s->s.refs[i]);
4262 if (s->next_refs[i].f->buf[0] &&
4263 (res = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
4267 if (!s->s.h.invisible) {
4268 if ((res = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
4276 static void vp9_decode_flush(AVCodecContext *ctx)
4278 VP9Context *s = ctx->priv_data;
4281 for (i = 0; i < 3; i++)
4282 vp9_unref_frame(ctx, &s->s.frames[i]);
4283 for (i = 0; i < 8; i++)
4284 ff_thread_release_buffer(ctx, &s->s.refs[i]);
4287 static int init_frames(AVCodecContext *ctx)
4289 VP9Context *s = ctx->priv_data;
4292 for (i = 0; i < 3; i++) {
4293 s->s.frames[i].tf.f = av_frame_alloc();
4294 if (!s->s.frames[i].tf.f) {
4295 vp9_decode_free(ctx);
4296 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4297 return AVERROR(ENOMEM);
4300 for (i = 0; i < 8; i++) {
4301 s->s.refs[i].f = av_frame_alloc();
4302 s->next_refs[i].f = av_frame_alloc();
4303 if (!s->s.refs[i].f || !s->next_refs[i].f) {
4304 vp9_decode_free(ctx);
4305 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4306 return AVERROR(ENOMEM);
4313 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4315 VP9Context *s = ctx->priv_data;
4317 ctx->internal->allocate_progress = 1;
4319 s->s.h.filter.sharpness = -1;
4321 return init_frames(ctx);
4325 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4327 return init_frames(avctx);
4330 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4333 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4335 for (i = 0; i < 3; i++) {
4336 if (s->s.frames[i].tf.f->buf[0])
4337 vp9_unref_frame(dst, &s->s.frames[i]);
4338 if (ssrc->s.frames[i].tf.f->buf[0]) {
4339 if ((res = vp9_ref_frame(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
4343 for (i = 0; i < 8; i++) {
4344 if (s->s.refs[i].f->buf[0])
4345 ff_thread_release_buffer(dst, &s->s.refs[i]);
4346 if (ssrc->next_refs[i].f->buf[0]) {
4347 if ((res = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
4352 s->s.h.invisible = ssrc->s.h.invisible;
4353 s->s.h.keyframe = ssrc->s.h.keyframe;
4354 s->s.h.intraonly = ssrc->s.h.intraonly;
4355 s->ss_v = ssrc->ss_v;
4356 s->ss_h = ssrc->ss_h;
4357 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
4358 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
4359 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
4360 s->bytesperpixel = ssrc->bytesperpixel;
4361 s->gf_fmt = ssrc->gf_fmt;
4364 s->s.h.bpp = ssrc->s.h.bpp;
4365 s->bpp_index = ssrc->bpp_index;
4366 s->pix_fmt = ssrc->pix_fmt;
4367 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4368 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
4369 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
4370 sizeof(s->s.h.segmentation.feat));
4376 AVCodec ff_vp9_decoder = {
4378 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4379 .type = AVMEDIA_TYPE_VIDEO,
4380 .id = AV_CODEC_ID_VP9,
4381 .priv_data_size = sizeof(VP9Context),
4382 .init = vp9_decode_init,
4383 .close = vp9_decode_free,
4384 .decode = vp9_decode_frame,
4385 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
4386 .flush = vp9_decode_flush,
4387 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4388 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
4389 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),