2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
82 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
83 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
86 typedef struct VP9Block {
87 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
88 enum FilterMode filter;
89 VP56mv mv[4 /* b_idx */][2 /* ref */];
91 enum TxfmMode tx, uvtx;
93 enum BlockPartition bp;
96 typedef struct VP9Context {
103 VP9Block *b_base, *b;
105 int row, row7, col, col7;
107 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
113 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
160 uint8_t skip_enabled;
169 unsigned log2_tile_cols, log2_tile_rows;
170 unsigned tile_cols, tile_rows;
171 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
173 unsigned sb_cols, sb_rows, rows, cols;
176 uint8_t coef[4][2][2][6][6][3];
180 uint8_t coef[4][2][2][6][6][11];
185 unsigned y_mode[4][10];
186 unsigned uv_mode[10][10];
187 unsigned filter[4][3];
188 unsigned mv_mode[7][4];
189 unsigned intra[4][2];
191 unsigned single_ref[5][2][2];
192 unsigned comp_ref[5][2];
193 unsigned tx32p[2][4];
194 unsigned tx16p[2][3];
197 unsigned mv_joint[4];
200 unsigned classes[11];
202 unsigned bits[10][2];
203 unsigned class0_fp[2][4];
205 unsigned class0_hp[2];
208 unsigned partition[4][4][4];
209 unsigned coef[4][2][2][6][6][3];
210 unsigned eob[4][2][2][6][6][2];
212 enum TxfmMode txfmmode;
213 enum CompPredMode comppredmode;
215 // contextual (left/above) cache
216 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
217 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
218 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
219 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
220 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
228 uint8_t *above_partition_ctx;
229 uint8_t *above_mode_ctx;
230 // FIXME maybe merge some of the below in a flags field?
231 uint8_t *above_y_nnz_ctx;
232 uint8_t *above_uv_nnz_ctx[2];
233 uint8_t *above_skip_ctx; // 1bit
234 uint8_t *above_txfm_ctx; // 2bit
235 uint8_t *above_segpred_ctx; // 1bit
236 uint8_t *above_intra_ctx; // 1bit
237 uint8_t *above_comp_ctx; // 1bit
238 uint8_t *above_ref_ctx; // 2bit
239 uint8_t *above_filter_ctx;
240 VP56mv (*above_mv_ctx)[2];
243 uint8_t *intra_pred_data[3];
244 struct VP9Filter *lflvl;
245 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
247 // block reconstruction intermediates
248 int block_alloc_using_2pass;
249 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
250 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
251 struct { int x, y; } min_mv, max_mv;
252 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
253 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
256 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
258 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
259 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
261 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
262 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
266 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
268 VP9Context *s = ctx->priv_data;
271 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
273 sz = 64 * s->sb_cols * s->sb_rows;
274 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
275 ff_thread_release_buffer(ctx, &f->tf);
276 return AVERROR(ENOMEM);
279 f->segmentation_map = f->extradata->data;
280 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
285 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
287 ff_thread_release_buffer(ctx, &f->tf);
288 av_buffer_unref(&f->extradata);
291 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
295 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
297 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
298 vp9_unref_frame(ctx, dst);
299 return AVERROR(ENOMEM);
302 dst->segmentation_map = src->segmentation_map;
304 dst->uses_2pass = src->uses_2pass;
309 static int update_size(AVCodecContext *ctx, int w, int h)
311 VP9Context *s = ctx->priv_data;
314 av_assert0(w > 0 && h > 0);
316 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
321 s->sb_cols = (w + 63) >> 6;
322 s->sb_rows = (h + 63) >> 6;
323 s->cols = (w + 7) >> 3;
324 s->rows = (h + 7) >> 3;
326 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
327 av_freep(&s->intra_pred_data[0]);
328 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
330 return AVERROR(ENOMEM);
331 assign(s->intra_pred_data[0], uint8_t *, 64);
332 assign(s->intra_pred_data[1], uint8_t *, 32);
333 assign(s->intra_pred_data[2], uint8_t *, 32);
334 assign(s->above_y_nnz_ctx, uint8_t *, 16);
335 assign(s->above_mode_ctx, uint8_t *, 16);
336 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
337 assign(s->above_partition_ctx, uint8_t *, 8);
338 assign(s->above_skip_ctx, uint8_t *, 8);
339 assign(s->above_txfm_ctx, uint8_t *, 8);
340 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
341 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
342 assign(s->above_segpred_ctx, uint8_t *, 8);
343 assign(s->above_intra_ctx, uint8_t *, 8);
344 assign(s->above_comp_ctx, uint8_t *, 8);
345 assign(s->above_ref_ctx, uint8_t *, 8);
346 assign(s->above_filter_ctx, uint8_t *, 8);
347 assign(s->lflvl, struct VP9Filter *, 1);
350 // these will be re-allocated a little later
351 av_freep(&s->b_base);
352 av_freep(&s->block_base);
357 static int update_block_buffers(AVCodecContext *ctx)
359 VP9Context *s = ctx->priv_data;
361 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
365 av_free(s->block_base);
366 if (s->frames[CUR_FRAME].uses_2pass) {
367 int sbs = s->sb_cols * s->sb_rows;
369 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
370 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
371 if (!s->b_base || !s->block_base)
372 return AVERROR(ENOMEM);
373 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
374 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
375 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
376 s->uveob_base[0] = s->eob_base + 256 * sbs;
377 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
379 s->b_base = av_malloc(sizeof(VP9Block));
380 s->block_base = av_mallocz((64 * 64 + 128) * 3);
381 if (!s->b_base || !s->block_base)
382 return AVERROR(ENOMEM);
383 s->uvblock_base[0] = s->block_base + 64 * 64;
384 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
385 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
386 s->uveob_base[0] = s->eob_base + 256;
387 s->uveob_base[1] = s->uveob_base[0] + 64;
389 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
394 // for some reason the sign bit is at the end, not the start, of a bit sequence
395 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
397 int v = get_bits(gb, n);
398 return get_bits1(gb) ? -v : v;
401 static av_always_inline int inv_recenter_nonneg(int v, int m)
403 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
406 // differential forward probability updates
407 static int update_prob(VP56RangeCoder *c, int p)
409 static const int inv_map_table[254] = {
410 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
411 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
412 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
413 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
414 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
415 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
416 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
417 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
418 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
419 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
420 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
421 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
422 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
423 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
424 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
425 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
426 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
427 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
432 /* This code is trying to do a differential probability update. For a
433 * current probability A in the range [1, 255], the difference to a new
434 * probability of any value can be expressed differentially as 1-A,255-A
435 * where some part of this (absolute range) exists both in positive as
436 * well as the negative part, whereas another part only exists in one
437 * half. We're trying to code this shared part differentially, i.e.
438 * times two where the value of the lowest bit specifies the sign, and
439 * the single part is then coded on top of this. This absolute difference
440 * then again has a value of [0,254], but a bigger value in this range
441 * indicates that we're further away from the original value A, so we
442 * can code this as a VLC code, since higher values are increasingly
443 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
444 * updates vs. the 'fine, exact' updates further down the range, which
445 * adds one extra dimension to this differential update model. */
447 if (!vp8_rac_get(c)) {
448 d = vp8_rac_get_uint(c, 4) + 0;
449 } else if (!vp8_rac_get(c)) {
450 d = vp8_rac_get_uint(c, 4) + 16;
451 } else if (!vp8_rac_get(c)) {
452 d = vp8_rac_get_uint(c, 5) + 32;
454 d = vp8_rac_get_uint(c, 7);
456 d = (d << 1) - 65 + vp8_rac_get(c);
460 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
461 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
464 static int decode_frame_header(AVCodecContext *ctx,
465 const uint8_t *data, int size, int *ref)
467 VP9Context *s = ctx->priv_data;
468 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
470 const uint8_t *data2;
473 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
474 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
477 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
478 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
479 return AVERROR_INVALIDDATA;
481 s->profile = get_bits1(&s->gb);
482 if (get_bits1(&s->gb)) { // reserved bit
483 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
484 return AVERROR_INVALIDDATA;
486 if (get_bits1(&s->gb)) {
487 *ref = get_bits(&s->gb, 3);
490 s->last_keyframe = s->keyframe;
491 s->keyframe = !get_bits1(&s->gb);
492 last_invisible = s->invisible;
493 s->invisible = !get_bits1(&s->gb);
494 s->errorres = get_bits1(&s->gb);
495 s->use_last_frame_mvs = !s->errorres && !last_invisible;
497 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
498 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
499 return AVERROR_INVALIDDATA;
501 s->colorspace = get_bits(&s->gb, 3);
502 if (s->colorspace == 7) { // RGB = profile 1
503 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
504 return AVERROR_INVALIDDATA;
506 s->fullrange = get_bits1(&s->gb);
507 // for profile 1, here follows the subsampling bits
508 s->refreshrefmask = 0xff;
509 w = get_bits(&s->gb, 16) + 1;
510 h = get_bits(&s->gb, 16) + 1;
511 if (get_bits1(&s->gb)) // display size
512 skip_bits(&s->gb, 32);
514 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
515 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
517 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
518 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
519 return AVERROR_INVALIDDATA;
521 s->refreshrefmask = get_bits(&s->gb, 8);
522 w = get_bits(&s->gb, 16) + 1;
523 h = get_bits(&s->gb, 16) + 1;
524 if (get_bits1(&s->gb)) // display size
525 skip_bits(&s->gb, 32);
527 s->refreshrefmask = get_bits(&s->gb, 8);
528 s->refidx[0] = get_bits(&s->gb, 3);
529 s->signbias[0] = get_bits1(&s->gb);
530 s->refidx[1] = get_bits(&s->gb, 3);
531 s->signbias[1] = get_bits1(&s->gb);
532 s->refidx[2] = get_bits(&s->gb, 3);
533 s->signbias[2] = get_bits1(&s->gb);
534 if (!s->refs[s->refidx[0]].f->data[0] ||
535 !s->refs[s->refidx[1]].f->data[0] ||
536 !s->refs[s->refidx[2]].f->data[0]) {
537 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
538 return AVERROR_INVALIDDATA;
540 if (get_bits1(&s->gb)) {
541 w = s->refs[s->refidx[0]].f->width;
542 h = s->refs[s->refidx[0]].f->height;
543 } else if (get_bits1(&s->gb)) {
544 w = s->refs[s->refidx[1]].f->width;
545 h = s->refs[s->refidx[1]].f->height;
546 } else if (get_bits1(&s->gb)) {
547 w = s->refs[s->refidx[2]].f->width;
548 h = s->refs[s->refidx[2]].f->height;
550 w = get_bits(&s->gb, 16) + 1;
551 h = get_bits(&s->gb, 16) + 1;
553 // Note that in this code, "CUR_FRAME" is actually before we
554 // have formally allocated a frame, and thus actually represents
556 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
557 s->frames[CUR_FRAME].tf.f->height == h;
558 if (get_bits1(&s->gb)) // display size
559 skip_bits(&s->gb, 32);
560 s->highprecisionmvs = get_bits1(&s->gb);
561 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
563 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
564 s->signbias[0] != s->signbias[2];
565 if (s->allowcompinter) {
566 if (s->signbias[0] == s->signbias[1]) {
568 s->varcompref[0] = 0;
569 s->varcompref[1] = 1;
570 } else if (s->signbias[0] == s->signbias[2]) {
572 s->varcompref[0] = 0;
573 s->varcompref[1] = 2;
576 s->varcompref[0] = 1;
577 s->varcompref[1] = 2;
582 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
583 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
584 s->framectxid = c = get_bits(&s->gb, 2);
586 /* loopfilter header data */
587 s->filter.level = get_bits(&s->gb, 6);
588 sharp = get_bits(&s->gb, 3);
589 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
590 // the old cache values since they are still valid
591 if (s->filter.sharpness != sharp)
592 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
593 s->filter.sharpness = sharp;
594 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
595 if (get_bits1(&s->gb)) {
596 for (i = 0; i < 4; i++)
597 if (get_bits1(&s->gb))
598 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
599 for (i = 0; i < 2; i++)
600 if (get_bits1(&s->gb))
601 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
604 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
607 /* quantization header data */
608 s->yac_qi = get_bits(&s->gb, 8);
609 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
610 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
611 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
612 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
613 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
615 /* segmentation header info */
616 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
617 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
618 for (i = 0; i < 7; i++)
619 s->prob.seg[i] = get_bits1(&s->gb) ?
620 get_bits(&s->gb, 8) : 255;
621 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
622 for (i = 0; i < 3; i++)
623 s->prob.segpred[i] = get_bits1(&s->gb) ?
624 get_bits(&s->gb, 8) : 255;
627 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
628 (w != s->frames[CUR_FRAME].tf.f->width ||
629 h != s->frames[CUR_FRAME].tf.f->height)) {
630 av_log(ctx, AV_LOG_ERROR,
631 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
632 s->segmentation.temporal, s->segmentation.update_map);
633 return AVERROR_INVALIDDATA;
636 if (get_bits1(&s->gb)) {
637 s->segmentation.absolute_vals = get_bits1(&s->gb);
638 for (i = 0; i < 8; i++) {
639 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
640 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
641 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
642 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
643 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
644 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
645 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
649 s->segmentation.feat[0].q_enabled = 0;
650 s->segmentation.feat[0].lf_enabled = 0;
651 s->segmentation.feat[0].skip_enabled = 0;
652 s->segmentation.feat[0].ref_enabled = 0;
655 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
656 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
657 int qyac, qydc, quvac, quvdc, lflvl, sh;
659 if (s->segmentation.feat[i].q_enabled) {
660 if (s->segmentation.absolute_vals)
661 qyac = s->segmentation.feat[i].q_val;
663 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
667 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
668 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
669 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
670 qyac = av_clip_uintp2(qyac, 8);
672 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
673 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
674 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
675 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
677 sh = s->filter.level >= 32;
678 if (s->segmentation.feat[i].lf_enabled) {
679 if (s->segmentation.absolute_vals)
680 lflvl = s->segmentation.feat[i].lf_val;
682 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
684 lflvl = s->filter.level;
686 s->segmentation.feat[i].lflvl[0][0] =
687 s->segmentation.feat[i].lflvl[0][1] =
688 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
689 for (j = 1; j < 4; j++) {
690 s->segmentation.feat[i].lflvl[j][0] =
691 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
692 s->lf_delta.mode[0]) * (1 << sh)), 6);
693 s->segmentation.feat[i].lflvl[j][1] =
694 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
695 s->lf_delta.mode[1]) * (1 << sh)), 6);
700 if ((res = update_size(ctx, w, h)) < 0) {
701 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
704 for (s->tiling.log2_tile_cols = 0;
705 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
706 s->tiling.log2_tile_cols++) ;
707 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
708 max = FFMAX(0, max - 1);
709 while (max > s->tiling.log2_tile_cols) {
710 if (get_bits1(&s->gb))
711 s->tiling.log2_tile_cols++;
715 s->tiling.log2_tile_rows = decode012(&s->gb);
716 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
717 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
718 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
719 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
720 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
722 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
723 return AVERROR(ENOMEM);
727 if (s->keyframe || s->errorres || s->intraonly) {
728 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
729 s->prob_ctx[3].p = vp9_default_probs;
730 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
731 sizeof(vp9_default_coef_probs));
732 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
733 sizeof(vp9_default_coef_probs));
734 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
735 sizeof(vp9_default_coef_probs));
736 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
737 sizeof(vp9_default_coef_probs));
740 // next 16 bits is size of the rest of the header (arith-coded)
741 size2 = get_bits(&s->gb, 16);
742 data2 = align_get_bits(&s->gb);
743 if (size2 > size - (data2 - data)) {
744 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
745 return AVERROR_INVALIDDATA;
747 ff_vp56_init_range_decoder(&s->c, data2, size2);
748 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
749 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
750 return AVERROR_INVALIDDATA;
753 if (s->keyframe || s->intraonly) {
754 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
756 memset(&s->counts, 0, sizeof(s->counts));
758 // FIXME is it faster to not copy here, but do it down in the fw updates
759 // as explicit copies if the fw update is missing (and skip the copy upon
761 s->prob.p = s->prob_ctx[c].p;
765 s->txfmmode = TX_4X4;
767 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
768 if (s->txfmmode == 3)
769 s->txfmmode += vp8_rac_get(&s->c);
771 if (s->txfmmode == TX_SWITCHABLE) {
772 for (i = 0; i < 2; i++)
773 if (vp56_rac_get_prob_branchy(&s->c, 252))
774 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
775 for (i = 0; i < 2; i++)
776 for (j = 0; j < 2; j++)
777 if (vp56_rac_get_prob_branchy(&s->c, 252))
778 s->prob.p.tx16p[i][j] =
779 update_prob(&s->c, s->prob.p.tx16p[i][j]);
780 for (i = 0; i < 2; i++)
781 for (j = 0; j < 3; j++)
782 if (vp56_rac_get_prob_branchy(&s->c, 252))
783 s->prob.p.tx32p[i][j] =
784 update_prob(&s->c, s->prob.p.tx32p[i][j]);
789 for (i = 0; i < 4; i++) {
790 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
791 if (vp8_rac_get(&s->c)) {
792 for (j = 0; j < 2; j++)
793 for (k = 0; k < 2; k++)
794 for (l = 0; l < 6; l++)
795 for (m = 0; m < 6; m++) {
796 uint8_t *p = s->prob.coef[i][j][k][l][m];
797 uint8_t *r = ref[j][k][l][m];
798 if (m >= 3 && l == 0) // dc only has 3 pt
800 for (n = 0; n < 3; n++) {
801 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
802 p[n] = update_prob(&s->c, r[n]);
810 for (j = 0; j < 2; j++)
811 for (k = 0; k < 2; k++)
812 for (l = 0; l < 6; l++)
813 for (m = 0; m < 6; m++) {
814 uint8_t *p = s->prob.coef[i][j][k][l][m];
815 uint8_t *r = ref[j][k][l][m];
816 if (m > 3 && l == 0) // dc only has 3 pt
822 if (s->txfmmode == i)
827 for (i = 0; i < 3; i++)
828 if (vp56_rac_get_prob_branchy(&s->c, 252))
829 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
830 if (!s->keyframe && !s->intraonly) {
831 for (i = 0; i < 7; i++)
832 for (j = 0; j < 3; j++)
833 if (vp56_rac_get_prob_branchy(&s->c, 252))
834 s->prob.p.mv_mode[i][j] =
835 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
837 if (s->filtermode == FILTER_SWITCHABLE)
838 for (i = 0; i < 4; i++)
839 for (j = 0; j < 2; j++)
840 if (vp56_rac_get_prob_branchy(&s->c, 252))
841 s->prob.p.filter[i][j] =
842 update_prob(&s->c, s->prob.p.filter[i][j]);
844 for (i = 0; i < 4; i++)
845 if (vp56_rac_get_prob_branchy(&s->c, 252))
846 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
848 if (s->allowcompinter) {
849 s->comppredmode = vp8_rac_get(&s->c);
851 s->comppredmode += vp8_rac_get(&s->c);
852 if (s->comppredmode == PRED_SWITCHABLE)
853 for (i = 0; i < 5; i++)
854 if (vp56_rac_get_prob_branchy(&s->c, 252))
856 update_prob(&s->c, s->prob.p.comp[i]);
858 s->comppredmode = PRED_SINGLEREF;
861 if (s->comppredmode != PRED_COMPREF) {
862 for (i = 0; i < 5; i++) {
863 if (vp56_rac_get_prob_branchy(&s->c, 252))
864 s->prob.p.single_ref[i][0] =
865 update_prob(&s->c, s->prob.p.single_ref[i][0]);
866 if (vp56_rac_get_prob_branchy(&s->c, 252))
867 s->prob.p.single_ref[i][1] =
868 update_prob(&s->c, s->prob.p.single_ref[i][1]);
872 if (s->comppredmode != PRED_SINGLEREF) {
873 for (i = 0; i < 5; i++)
874 if (vp56_rac_get_prob_branchy(&s->c, 252))
875 s->prob.p.comp_ref[i] =
876 update_prob(&s->c, s->prob.p.comp_ref[i]);
879 for (i = 0; i < 4; i++)
880 for (j = 0; j < 9; j++)
881 if (vp56_rac_get_prob_branchy(&s->c, 252))
882 s->prob.p.y_mode[i][j] =
883 update_prob(&s->c, s->prob.p.y_mode[i][j]);
885 for (i = 0; i < 4; i++)
886 for (j = 0; j < 4; j++)
887 for (k = 0; k < 3; k++)
888 if (vp56_rac_get_prob_branchy(&s->c, 252))
889 s->prob.p.partition[3 - i][j][k] =
890 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
892 // mv fields don't use the update_prob subexp model for some reason
893 for (i = 0; i < 3; i++)
894 if (vp56_rac_get_prob_branchy(&s->c, 252))
895 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
897 for (i = 0; i < 2; i++) {
898 if (vp56_rac_get_prob_branchy(&s->c, 252))
899 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
901 for (j = 0; j < 10; j++)
902 if (vp56_rac_get_prob_branchy(&s->c, 252))
903 s->prob.p.mv_comp[i].classes[j] =
904 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
906 if (vp56_rac_get_prob_branchy(&s->c, 252))
907 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
909 for (j = 0; j < 10; j++)
910 if (vp56_rac_get_prob_branchy(&s->c, 252))
911 s->prob.p.mv_comp[i].bits[j] =
912 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
915 for (i = 0; i < 2; i++) {
916 for (j = 0; j < 2; j++)
917 for (k = 0; k < 3; k++)
918 if (vp56_rac_get_prob_branchy(&s->c, 252))
919 s->prob.p.mv_comp[i].class0_fp[j][k] =
920 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
922 for (j = 0; j < 3; j++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.mv_comp[i].fp[j] =
925 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
928 if (s->highprecisionmvs) {
929 for (i = 0; i < 2; i++) {
930 if (vp56_rac_get_prob_branchy(&s->c, 252))
931 s->prob.p.mv_comp[i].class0_hp =
932 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
934 if (vp56_rac_get_prob_branchy(&s->c, 252))
935 s->prob.p.mv_comp[i].hp =
936 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
941 return (data2 - data) + size2;
944 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
947 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
948 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
951 static void find_ref_mvs(VP9Context *s,
952 VP56mv *pmv, int ref, int z, int idx, int sb)
954 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
955 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
956 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
957 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
958 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
959 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
960 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
961 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
962 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
963 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
964 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
965 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
966 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
967 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
968 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
969 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
970 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
971 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
972 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
973 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
974 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
975 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
976 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
977 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
978 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
979 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
980 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
983 int row = s->row, col = s->col, row7 = s->row7;
984 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
985 #define INVALID_MV 0x80008000U
986 uint32_t mem = INVALID_MV;
989 #define RETURN_DIRECT_MV(mv) \
991 uint32_t m = AV_RN32A(&mv); \
995 } else if (mem == INVALID_MV) { \
997 } else if (m != mem) { \
1004 if (sb == 2 || sb == 1) {
1005 RETURN_DIRECT_MV(b->mv[0][z]);
1006 } else if (sb == 3) {
1007 RETURN_DIRECT_MV(b->mv[2][z]);
1008 RETURN_DIRECT_MV(b->mv[1][z]);
1009 RETURN_DIRECT_MV(b->mv[0][z]);
1012 #define RETURN_MV(mv) \
1017 clamp_mv(&tmp, &mv, s); \
1018 m = AV_RN32A(&tmp); \
1022 } else if (mem == INVALID_MV) { \
1024 } else if (m != mem) { \
1029 uint32_t m = AV_RN32A(&mv); \
1031 clamp_mv(pmv, &mv, s); \
1033 } else if (mem == INVALID_MV) { \
1035 } else if (m != mem) { \
1036 clamp_mv(pmv, &mv, s); \
1043 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1044 if (mv->ref[0] == ref) {
1045 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1046 } else if (mv->ref[1] == ref) {
1047 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1050 if (col > s->tiling.tile_col_start) {
1051 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1052 if (mv->ref[0] == ref) {
1053 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1054 } else if (mv->ref[1] == ref) {
1055 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1063 // previously coded MVs in this neighbourhood, using same reference frame
1064 for (; i < 8; i++) {
1065 int c = p[i][0] + col, r = p[i][1] + row;
1067 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1068 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1070 if (mv->ref[0] == ref) {
1071 RETURN_MV(mv->mv[0]);
1072 } else if (mv->ref[1] == ref) {
1073 RETURN_MV(mv->mv[1]);
1078 // MV at this position in previous frame, using same reference frame
1079 if (s->use_last_frame_mvs) {
1080 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1082 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1083 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1084 if (mv->ref[0] == ref) {
1085 RETURN_MV(mv->mv[0]);
1086 } else if (mv->ref[1] == ref) {
1087 RETURN_MV(mv->mv[1]);
1091 #define RETURN_SCALE_MV(mv, scale) \
1094 VP56mv mv_temp = { -mv.x, -mv.y }; \
1095 RETURN_MV(mv_temp); \
1101 // previously coded MVs in this neighbourhood, using different reference frame
1102 for (i = 0; i < 8; i++) {
1103 int c = p[i][0] + col, r = p[i][1] + row;
1105 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1106 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1108 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1109 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1111 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1112 // BUG - libvpx has this condition regardless of whether
1113 // we used the first ref MV and pre-scaling
1114 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1115 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1120 // MV at this position in previous frame, using different reference frame
1121 if (s->use_last_frame_mvs) {
1122 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1124 // no need to await_progress, because we already did that above
1125 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1126 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1128 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1129 // BUG - libvpx has this condition regardless of whether
1130 // we used the first ref MV and pre-scaling
1131 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1132 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1139 #undef RETURN_SCALE_MV
1142 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1144 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1145 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1146 s->prob.p.mv_comp[idx].classes);
1148 s->counts.mv_comp[idx].sign[sign]++;
1149 s->counts.mv_comp[idx].classes[c]++;
1153 for (n = 0, m = 0; m < c; m++) {
1154 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1156 s->counts.mv_comp[idx].bits[m][bit]++;
1159 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1161 s->counts.mv_comp[idx].fp[bit]++;
1163 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1164 s->counts.mv_comp[idx].hp[bit]++;
1168 // bug in libvpx - we count for bw entropy purposes even if the
1170 s->counts.mv_comp[idx].hp[1]++;
1174 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1175 s->counts.mv_comp[idx].class0[n]++;
1176 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1177 s->prob.p.mv_comp[idx].class0_fp[n]);
1178 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1179 n = (n << 3) | (bit << 1);
1181 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1182 s->counts.mv_comp[idx].class0_hp[bit]++;
1186 // bug in libvpx - we count for bw entropy purposes even if the
1188 s->counts.mv_comp[idx].class0_hp[1]++;
1192 return sign ? -(n + 1) : (n + 1);
1195 static void fill_mv(VP9Context *s,
1196 VP56mv *mv, int mode, int sb)
1200 if (mode == ZEROMV) {
1205 // FIXME cache this value and reuse for other subblocks
1206 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1207 mode == NEWMV ? -1 : sb);
1208 // FIXME maybe move this code into find_ref_mvs()
1209 if ((mode == NEWMV || sb == -1) &&
1210 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1224 if (mode == NEWMV) {
1225 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1226 s->prob.p.mv_joint);
1228 s->counts.mv_joint[j]++;
1229 if (j >= MV_JOINT_V)
1230 mv[0].y += read_mv_component(s, 0, hp);
1232 mv[0].x += read_mv_component(s, 1, hp);
1236 // FIXME cache this value and reuse for other subblocks
1237 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1238 mode == NEWMV ? -1 : sb);
1239 if ((mode == NEWMV || sb == -1) &&
1240 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1254 if (mode == NEWMV) {
1255 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1256 s->prob.p.mv_joint);
1258 s->counts.mv_joint[j]++;
1259 if (j >= MV_JOINT_V)
1260 mv[1].y += read_mv_component(s, 0, hp);
1262 mv[1].x += read_mv_component(s, 1, hp);
1268 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1269 ptrdiff_t stride, int v)
1279 int v16 = v * 0x0101;
1287 uint32_t v32 = v * 0x01010101;
1296 uint64_t v64 = v * 0x0101010101010101ULL;
1302 uint32_t v32 = v * 0x01010101;
1305 AV_WN32A(ptr + 4, v32);
1314 static void decode_mode(AVCodecContext *ctx)
1316 static const uint8_t left_ctx[N_BS_SIZES] = {
1317 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1319 static const uint8_t above_ctx[N_BS_SIZES] = {
1320 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1322 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1323 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1324 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1326 VP9Context *s = ctx->priv_data;
1328 int row = s->row, col = s->col, row7 = s->row7;
1329 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1330 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1331 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1332 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1333 int vref, filter_id;
1335 if (!s->segmentation.enabled) {
1337 } else if (s->keyframe || s->intraonly) {
1338 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1339 } else if (!s->segmentation.update_map ||
1340 (s->segmentation.temporal &&
1341 vp56_rac_get_prob_branchy(&s->c,
1342 s->prob.segpred[s->above_segpred_ctx[col] +
1343 s->left_segpred_ctx[row7]]))) {
1346 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1348 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1349 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1350 for (y = 0; y < h4; y++) {
1351 int idx_base = (y + row) * 8 * s->sb_cols + col;
1352 for (x = 0; x < w4; x++)
1353 pred = FFMIN(pred, refsegmap[idx_base + x]);
1355 av_assert1(pred < 8);
1361 memset(&s->above_segpred_ctx[col], 1, w4);
1362 memset(&s->left_segpred_ctx[row7], 1, h4);
1364 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1367 memset(&s->above_segpred_ctx[col], 0, w4);
1368 memset(&s->left_segpred_ctx[row7], 0, h4);
1370 if (s->segmentation.enabled &&
1371 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1372 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1373 w4, h4, 8 * s->sb_cols, b->seg_id);
1376 b->skip = s->segmentation.enabled &&
1377 s->segmentation.feat[b->seg_id].skip_enabled;
1379 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1380 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1381 s->counts.skip[c][b->skip]++;
1384 if (s->keyframe || s->intraonly) {
1386 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1387 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1391 if (have_a && have_l) {
1392 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1395 c = have_a ? 2 * s->above_intra_ctx[col] :
1396 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1398 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1399 s->counts.intra[c][bit]++;
1403 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1407 c = (s->above_skip_ctx[col] ? max_tx :
1408 s->above_txfm_ctx[col]) +
1409 (s->left_skip_ctx[row7] ? max_tx :
1410 s->left_txfm_ctx[row7]) > max_tx;
1412 c = s->above_skip_ctx[col] ? 1 :
1413 (s->above_txfm_ctx[col] * 2 > max_tx);
1415 } else if (have_l) {
1416 c = s->left_skip_ctx[row7] ? 1 :
1417 (s->left_txfm_ctx[row7] * 2 > max_tx);
1423 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1425 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1427 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1429 s->counts.tx32p[c][b->tx]++;
1432 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1434 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1435 s->counts.tx16p[c][b->tx]++;
1438 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1439 s->counts.tx8p[c][b->tx]++;
1446 b->tx = FFMIN(max_tx, s->txfmmode);
1449 if (s->keyframe || s->intraonly) {
1450 uint8_t *a = &s->above_mode_ctx[col * 2];
1451 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1454 if (b->bs > BS_8x8) {
1455 // FIXME the memory storage intermediates here aren't really
1456 // necessary, they're just there to make the code slightly
1458 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1459 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1460 if (b->bs != BS_8x4) {
1461 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1462 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1463 l[0] = a[1] = b->mode[1];
1465 l[0] = a[1] = b->mode[1] = b->mode[0];
1467 if (b->bs != BS_4x8) {
1468 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1469 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1470 if (b->bs != BS_8x4) {
1471 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1472 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1473 l[1] = a[1] = b->mode[3];
1475 l[1] = a[1] = b->mode[3] = b->mode[2];
1478 b->mode[2] = b->mode[0];
1479 l[1] = a[1] = b->mode[3] = b->mode[1];
1482 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1483 vp9_default_kf_ymode_probs[*a][*l]);
1484 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1485 // FIXME this can probably be optimized
1486 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1487 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1489 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1490 vp9_default_kf_uvmode_probs[b->mode[3]]);
1491 } else if (b->intra) {
1493 if (b->bs > BS_8x8) {
1494 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1495 s->prob.p.y_mode[0]);
1496 s->counts.y_mode[0][b->mode[0]]++;
1497 if (b->bs != BS_8x4) {
1498 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1499 s->prob.p.y_mode[0]);
1500 s->counts.y_mode[0][b->mode[1]]++;
1502 b->mode[1] = b->mode[0];
1504 if (b->bs != BS_4x8) {
1505 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1506 s->prob.p.y_mode[0]);
1507 s->counts.y_mode[0][b->mode[2]]++;
1508 if (b->bs != BS_8x4) {
1509 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1510 s->prob.p.y_mode[0]);
1511 s->counts.y_mode[0][b->mode[3]]++;
1513 b->mode[3] = b->mode[2];
1516 b->mode[2] = b->mode[0];
1517 b->mode[3] = b->mode[1];
1520 static const uint8_t size_group[10] = {
1521 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1523 int sz = size_group[b->bs];
1525 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1526 s->prob.p.y_mode[sz]);
1527 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1528 s->counts.y_mode[sz][b->mode[3]]++;
1530 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1531 s->prob.p.uv_mode[b->mode[3]]);
1532 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1534 static const uint8_t inter_mode_ctx_lut[14][14] = {
1535 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1543 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1544 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1545 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1546 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1547 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1548 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1551 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1552 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1554 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1556 // read comp_pred flag
1557 if (s->comppredmode != PRED_SWITCHABLE) {
1558 b->comp = s->comppredmode == PRED_COMPREF;
1562 // FIXME add intra as ref=0xff (or -1) to make these easier?
1565 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1567 } else if (s->above_comp_ctx[col]) {
1568 c = 2 + (s->left_intra_ctx[row7] ||
1569 s->left_ref_ctx[row7] == s->fixcompref);
1570 } else if (s->left_comp_ctx[row7]) {
1571 c = 2 + (s->above_intra_ctx[col] ||
1572 s->above_ref_ctx[col] == s->fixcompref);
1574 c = (!s->above_intra_ctx[col] &&
1575 s->above_ref_ctx[col] == s->fixcompref) ^
1576 (!s->left_intra_ctx[row7] &&
1577 s->left_ref_ctx[row & 7] == s->fixcompref);
1580 c = s->above_comp_ctx[col] ? 3 :
1581 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1583 } else if (have_l) {
1584 c = s->left_comp_ctx[row7] ? 3 :
1585 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1589 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1590 s->counts.comp[c][b->comp]++;
1593 // read actual references
1594 // FIXME probably cache a few variables here to prevent repetitive
1595 // memory accesses below
1596 if (b->comp) /* two references */ {
1597 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1599 b->ref[fix_idx] = s->fixcompref;
1600 // FIXME can this codeblob be replaced by some sort of LUT?
1603 if (s->above_intra_ctx[col]) {
1604 if (s->left_intra_ctx[row7]) {
1607 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1609 } else if (s->left_intra_ctx[row7]) {
1610 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1612 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1614 if (refl == refa && refa == s->varcompref[1]) {
1616 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1617 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1618 (refl == s->fixcompref && refa == s->varcompref[0])) {
1621 c = (refa == refl) ? 3 : 1;
1623 } else if (!s->left_comp_ctx[row7]) {
1624 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1627 c = (refl == s->varcompref[1] &&
1628 refa != s->varcompref[1]) ? 2 : 4;
1630 } else if (!s->above_comp_ctx[col]) {
1631 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1634 c = (refa == s->varcompref[1] &&
1635 refl != s->varcompref[1]) ? 2 : 4;
1638 c = (refl == refa) ? 4 : 2;
1642 if (s->above_intra_ctx[col]) {
1644 } else if (s->above_comp_ctx[col]) {
1645 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1647 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1650 } else if (have_l) {
1651 if (s->left_intra_ctx[row7]) {
1653 } else if (s->left_comp_ctx[row7]) {
1654 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1656 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1661 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1662 b->ref[var_idx] = s->varcompref[bit];
1663 s->counts.comp_ref[c][bit]++;
1664 } else /* single reference */ {
1667 if (have_a && !s->above_intra_ctx[col]) {
1668 if (have_l && !s->left_intra_ctx[row7]) {
1669 if (s->left_comp_ctx[row7]) {
1670 if (s->above_comp_ctx[col]) {
1671 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1672 !s->above_ref_ctx[col]);
1674 c = (3 * !s->above_ref_ctx[col]) +
1675 (!s->fixcompref || !s->left_ref_ctx[row7]);
1677 } else if (s->above_comp_ctx[col]) {
1678 c = (3 * !s->left_ref_ctx[row7]) +
1679 (!s->fixcompref || !s->above_ref_ctx[col]);
1681 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1683 } else if (s->above_intra_ctx[col]) {
1685 } else if (s->above_comp_ctx[col]) {
1686 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1688 c = 4 * (!s->above_ref_ctx[col]);
1690 } else if (have_l && !s->left_intra_ctx[row7]) {
1691 if (s->left_intra_ctx[row7]) {
1693 } else if (s->left_comp_ctx[row7]) {
1694 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1696 c = 4 * (!s->left_ref_ctx[row7]);
1701 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1702 s->counts.single_ref[c][0][bit]++;
1706 // FIXME can this codeblob be replaced by some sort of LUT?
1709 if (s->left_intra_ctx[row7]) {
1710 if (s->above_intra_ctx[col]) {
1712 } else if (s->above_comp_ctx[col]) {
1713 c = 1 + 2 * (s->fixcompref == 1 ||
1714 s->above_ref_ctx[col] == 1);
1715 } else if (!s->above_ref_ctx[col]) {
1718 c = 4 * (s->above_ref_ctx[col] == 1);
1720 } else if (s->above_intra_ctx[col]) {
1721 if (s->left_intra_ctx[row7]) {
1723 } else if (s->left_comp_ctx[row7]) {
1724 c = 1 + 2 * (s->fixcompref == 1 ||
1725 s->left_ref_ctx[row7] == 1);
1726 } else if (!s->left_ref_ctx[row7]) {
1729 c = 4 * (s->left_ref_ctx[row7] == 1);
1731 } else if (s->above_comp_ctx[col]) {
1732 if (s->left_comp_ctx[row7]) {
1733 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1734 c = 3 * (s->fixcompref == 1 ||
1735 s->left_ref_ctx[row7] == 1);
1739 } else if (!s->left_ref_ctx[row7]) {
1740 c = 1 + 2 * (s->fixcompref == 1 ||
1741 s->above_ref_ctx[col] == 1);
1743 c = 3 * (s->left_ref_ctx[row7] == 1) +
1744 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1746 } else if (s->left_comp_ctx[row7]) {
1747 if (!s->above_ref_ctx[col]) {
1748 c = 1 + 2 * (s->fixcompref == 1 ||
1749 s->left_ref_ctx[row7] == 1);
1751 c = 3 * (s->above_ref_ctx[col] == 1) +
1752 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1754 } else if (!s->above_ref_ctx[col]) {
1755 if (!s->left_ref_ctx[row7]) {
1758 c = 4 * (s->left_ref_ctx[row7] == 1);
1760 } else if (!s->left_ref_ctx[row7]) {
1761 c = 4 * (s->above_ref_ctx[col] == 1);
1763 c = 2 * (s->left_ref_ctx[row7] == 1) +
1764 2 * (s->above_ref_ctx[col] == 1);
1767 if (s->above_intra_ctx[col] ||
1768 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1770 } else if (s->above_comp_ctx[col]) {
1771 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1773 c = 4 * (s->above_ref_ctx[col] == 1);
1776 } else if (have_l) {
1777 if (s->left_intra_ctx[row7] ||
1778 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1780 } else if (s->left_comp_ctx[row7]) {
1781 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1783 c = 4 * (s->left_ref_ctx[row7] == 1);
1788 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1789 s->counts.single_ref[c][1][bit]++;
1790 b->ref[0] = 1 + bit;
1795 if (b->bs <= BS_8x8) {
1796 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1797 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1799 static const uint8_t off[10] = {
1800 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1803 // FIXME this needs to use the LUT tables from find_ref_mvs
1804 // because not all are -1,0/0,-1
1805 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1806 [s->left_mode_ctx[row7 + off[b->bs]]];
1808 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1809 s->prob.p.mv_mode[c]);
1810 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1811 s->counts.mv_mode[c][b->mode[0] - 10]++;
1815 if (s->filtermode == FILTER_SWITCHABLE) {
1818 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1819 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1820 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1821 s->left_filter_ctx[row7] : 3;
1823 c = s->above_filter_ctx[col];
1825 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1826 c = s->left_filter_ctx[row7];
1831 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1832 s->prob.p.filter[c]);
1833 s->counts.filter[c][filter_id]++;
1834 b->filter = vp9_filter_lut[filter_id];
1836 b->filter = s->filtermode;
1839 if (b->bs > BS_8x8) {
1840 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1842 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1843 s->prob.p.mv_mode[c]);
1844 s->counts.mv_mode[c][b->mode[0] - 10]++;
1845 fill_mv(s, b->mv[0], b->mode[0], 0);
1847 if (b->bs != BS_8x4) {
1848 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1849 s->prob.p.mv_mode[c]);
1850 s->counts.mv_mode[c][b->mode[1] - 10]++;
1851 fill_mv(s, b->mv[1], b->mode[1], 1);
1853 b->mode[1] = b->mode[0];
1854 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1855 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1858 if (b->bs != BS_4x8) {
1859 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1860 s->prob.p.mv_mode[c]);
1861 s->counts.mv_mode[c][b->mode[2] - 10]++;
1862 fill_mv(s, b->mv[2], b->mode[2], 2);
1864 if (b->bs != BS_8x4) {
1865 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1866 s->prob.p.mv_mode[c]);
1867 s->counts.mv_mode[c][b->mode[3] - 10]++;
1868 fill_mv(s, b->mv[3], b->mode[3], 3);
1870 b->mode[3] = b->mode[2];
1871 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1872 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1875 b->mode[2] = b->mode[0];
1876 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1877 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1878 b->mode[3] = b->mode[1];
1879 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1880 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1883 fill_mv(s, b->mv[0], b->mode[0], -1);
1884 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1885 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1886 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1887 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1888 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1889 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1892 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1896 #define SPLAT_CTX(var, val, n) \
1898 case 1: var = val; break; \
1899 case 2: AV_WN16A(&var, val * 0x0101); break; \
1900 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1901 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1903 uint64_t v64 = val * 0x0101010101010101ULL; \
1904 AV_WN64A( &var, v64); \
1905 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1910 #define SPLAT_CTX(var, val, n) \
1912 case 1: var = val; break; \
1913 case 2: AV_WN16A(&var, val * 0x0101); break; \
1914 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1916 uint32_t v32 = val * 0x01010101; \
1917 AV_WN32A( &var, v32); \
1918 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1922 uint32_t v32 = val * 0x01010101; \
1923 AV_WN32A( &var, v32); \
1924 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1925 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1926 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1932 switch (bwh_tab[1][b->bs][0]) {
1933 #define SET_CTXS(dir, off, n) \
1935 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1936 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1937 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1938 if (!s->keyframe && !s->intraonly) { \
1939 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1940 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1941 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1943 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1944 if (s->filtermode == FILTER_SWITCHABLE) { \
1945 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1950 case 1: SET_CTXS(above, col, 1); break;
1951 case 2: SET_CTXS(above, col, 2); break;
1952 case 4: SET_CTXS(above, col, 4); break;
1953 case 8: SET_CTXS(above, col, 8); break;
1955 switch (bwh_tab[1][b->bs][1]) {
1956 case 1: SET_CTXS(left, row7, 1); break;
1957 case 2: SET_CTXS(left, row7, 2); break;
1958 case 4: SET_CTXS(left, row7, 4); break;
1959 case 8: SET_CTXS(left, row7, 8); break;
1964 if (!s->keyframe && !s->intraonly) {
1965 if (b->bs > BS_8x8) {
1966 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1968 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1969 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1970 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1971 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1972 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1973 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1974 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1975 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1977 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1979 for (n = 0; n < w4 * 2; n++) {
1980 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1981 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1983 for (n = 0; n < h4 * 2; n++) {
1984 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1985 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1991 for (y = 0; y < h4; y++) {
1992 int x, o = (row + y) * s->sb_cols * 8 + col;
1993 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1996 for (x = 0; x < w4; x++) {
2000 } else if (b->comp) {
2001 for (x = 0; x < w4; x++) {
2002 mv[x].ref[0] = b->ref[0];
2003 mv[x].ref[1] = b->ref[1];
2004 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2005 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2008 for (x = 0; x < w4; x++) {
2009 mv[x].ref[0] = b->ref[0];
2011 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2017 // FIXME merge cnt/eob arguments?
2018 static av_always_inline int
2019 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2020 int is_tx32x32, unsigned (*cnt)[6][3],
2021 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2022 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2023 const int16_t *band_counts, const int16_t *qmul)
2025 int i = 0, band = 0, band_left = band_counts[band];
2026 uint8_t *tp = p[0][nnz];
2027 uint8_t cache[1024];
2032 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2033 eob[band][nnz][val]++;
2038 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2039 cnt[band][nnz][0]++;
2041 band_left = band_counts[++band];
2043 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2045 if (++i == n_coeffs)
2046 break; //invalid input; blocks should end with EOB
2051 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2052 cnt[band][nnz][1]++;
2056 // fill in p[3-10] (model fill) - only once per frame for each pos
2058 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2060 cnt[band][nnz][2]++;
2061 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2062 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2063 cache[rc] = val = 2;
2065 val = 3 + vp56_rac_get_prob(c, tp[5]);
2068 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2070 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2071 val = 5 + vp56_rac_get_prob(c, 159);
2073 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2074 val += vp56_rac_get_prob(c, 145);
2078 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2079 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2080 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2081 val += (vp56_rac_get_prob(c, 148) << 1);
2082 val += vp56_rac_get_prob(c, 140);
2084 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2085 val += (vp56_rac_get_prob(c, 155) << 2);
2086 val += (vp56_rac_get_prob(c, 140) << 1);
2087 val += vp56_rac_get_prob(c, 135);
2089 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2090 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2091 val += (vp56_rac_get_prob(c, 157) << 3);
2092 val += (vp56_rac_get_prob(c, 141) << 2);
2093 val += (vp56_rac_get_prob(c, 134) << 1);
2094 val += vp56_rac_get_prob(c, 130);
2096 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2097 val += (vp56_rac_get_prob(c, 254) << 12);
2098 val += (vp56_rac_get_prob(c, 254) << 11);
2099 val += (vp56_rac_get_prob(c, 252) << 10);
2100 val += (vp56_rac_get_prob(c, 249) << 9);
2101 val += (vp56_rac_get_prob(c, 243) << 8);
2102 val += (vp56_rac_get_prob(c, 230) << 7);
2103 val += (vp56_rac_get_prob(c, 196) << 6);
2104 val += (vp56_rac_get_prob(c, 177) << 5);
2105 val += (vp56_rac_get_prob(c, 153) << 4);
2106 val += (vp56_rac_get_prob(c, 140) << 3);
2107 val += (vp56_rac_get_prob(c, 133) << 2);
2108 val += (vp56_rac_get_prob(c, 130) << 1);
2109 val += vp56_rac_get_prob(c, 129);
2114 band_left = band_counts[++band];
2116 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2118 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2119 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2121 } while (++i < n_coeffs);
2126 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2127 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2128 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2129 const int16_t (*nb)[2], const int16_t *band_counts,
2130 const int16_t *qmul)
2132 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2133 nnz, scan, nb, band_counts, qmul);
2136 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2137 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2138 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2139 const int16_t (*nb)[2], const int16_t *band_counts,
2140 const int16_t *qmul)
2142 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2143 nnz, scan, nb, band_counts, qmul);
2146 static void decode_coeffs(AVCodecContext *ctx)
2148 VP9Context *s = ctx->priv_data;
2150 int row = s->row, col = s->col;
2151 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2152 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2153 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2154 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2155 int end_x = FFMIN(2 * (s->cols - col), w4);
2156 int end_y = FFMIN(2 * (s->rows - row), h4);
2157 int n, pl, x, y, res;
2158 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2159 int tx = 4 * s->lossless + b->tx;
2160 const int16_t * const *yscans = vp9_scans[tx];
2161 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2162 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2163 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2164 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2165 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2166 static const int16_t band_counts[4][8] = {
2167 { 1, 2, 3, 4, 3, 16 - 13 },
2168 { 1, 2, 3, 4, 11, 64 - 21 },
2169 { 1, 2, 3, 4, 11, 256 - 21 },
2170 { 1, 2, 3, 4, 11, 1024 - 21 },
2172 const int16_t *y_band_counts = band_counts[b->tx];
2173 const int16_t *uv_band_counts = band_counts[b->uvtx];
2175 #define MERGE(la, end, step, rd) \
2176 for (n = 0; n < end; n += step) \
2177 la[n] = !!rd(&la[n])
2178 #define MERGE_CTX(step, rd) \
2180 MERGE(l, end_y, step, rd); \
2181 MERGE(a, end_x, step, rd); \
2184 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2185 for (n = 0, y = 0; y < end_y; y += step) { \
2186 for (x = 0; x < end_x; x += step, n += step * step) { \
2187 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2188 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2189 c, e, p, a[x] + l[y], yscans[txtp], \
2190 ynbs[txtp], y_band_counts, qmul[0]); \
2191 a[x] = l[y] = !!res; \
2193 AV_WN16A(&s->eob[n], res); \
2200 #define SPLAT(la, end, step, cond) \
2202 for (n = 1; n < end; n += step) \
2203 la[n] = la[n - 1]; \
2204 } else if (step == 4) { \
2206 for (n = 0; n < end; n += step) \
2207 AV_WN32A(&la[n], la[n] * 0x01010101); \
2209 for (n = 0; n < end; n += step) \
2210 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2212 } else /* step == 8 */ { \
2214 if (HAVE_FAST_64BIT) { \
2215 for (n = 0; n < end; n += step) \
2216 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2218 for (n = 0; n < end; n += step) { \
2219 uint32_t v32 = la[n] * 0x01010101; \
2220 AV_WN32A(&la[n], v32); \
2221 AV_WN32A(&la[n + 4], v32); \
2225 for (n = 0; n < end; n += step) \
2226 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2229 #define SPLAT_CTX(step) \
2231 SPLAT(a, end_x, step, end_x == w4); \
2232 SPLAT(l, end_y, step, end_y == h4); \
2238 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2241 MERGE_CTX(2, AV_RN16A);
2242 DECODE_Y_COEF_LOOP(2, 0,);
2246 MERGE_CTX(4, AV_RN32A);
2247 DECODE_Y_COEF_LOOP(4, 0,);
2251 MERGE_CTX(8, AV_RN64A);
2252 DECODE_Y_COEF_LOOP(8, 0, 32);
2257 #define DECODE_UV_COEF_LOOP(step) \
2258 for (n = 0, y = 0; y < end_y; y += step) { \
2259 for (x = 0; x < end_x; x += step, n += step * step) { \
2260 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2261 16 * step * step, c, e, p, a[x] + l[y], \
2262 uvscan, uvnb, uv_band_counts, qmul[1]); \
2263 a[x] = l[y] = !!res; \
2265 AV_WN16A(&s->uveob[pl][n], res); \
2267 s->uveob[pl][n] = res; \
2272 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2273 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2274 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2279 for (pl = 0; pl < 2; pl++) {
2280 a = &s->above_uv_nnz_ctx[pl][col];
2281 l = &s->left_uv_nnz_ctx[pl][row & 7];
2284 DECODE_UV_COEF_LOOP(1);
2287 MERGE_CTX(2, AV_RN16A);
2288 DECODE_UV_COEF_LOOP(2);
2292 MERGE_CTX(4, AV_RN32A);
2293 DECODE_UV_COEF_LOOP(4);
2297 MERGE_CTX(8, AV_RN64A);
2298 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2299 // so there is no need to loop
2300 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2301 1024, c, e, p, a[0] + l[0],
2302 uvscan, uvnb, uv_band_counts, qmul[1]);
2303 a[0] = l[0] = !!res;
2304 AV_WN16A(&s->uveob[pl][0], res);
2311 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2312 uint8_t *dst_edge, ptrdiff_t stride_edge,
2313 uint8_t *dst_inner, ptrdiff_t stride_inner,
2314 uint8_t *l, int col, int x, int w,
2315 int row, int y, enum TxfmMode tx,
2318 int have_top = row > 0 || y > 0;
2319 int have_left = col > s->tiling.tile_col_start || x > 0;
2320 int have_right = x < w - 1;
2321 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2322 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2323 { DC_127_PRED, VERT_PRED } },
2324 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2325 { HOR_PRED, HOR_PRED } },
2326 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2327 { LEFT_DC_PRED, DC_PRED } },
2328 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2329 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2330 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2331 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2332 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2333 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2334 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2335 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2336 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2337 { DC_127_PRED, VERT_LEFT_PRED } },
2338 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2339 { HOR_UP_PRED, HOR_UP_PRED } },
2340 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2341 { HOR_PRED, TM_VP8_PRED } },
2343 static const struct {
2344 uint8_t needs_left:1;
2345 uint8_t needs_top:1;
2346 uint8_t needs_topleft:1;
2347 uint8_t needs_topright:1;
2348 uint8_t invert_left:1;
2349 } edges[N_INTRA_PRED_MODES] = {
2350 [VERT_PRED] = { .needs_top = 1 },
2351 [HOR_PRED] = { .needs_left = 1 },
2352 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2353 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2354 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2355 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2356 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2357 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2358 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2359 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2360 [LEFT_DC_PRED] = { .needs_left = 1 },
2361 [TOP_DC_PRED] = { .needs_top = 1 },
2362 [DC_128_PRED] = { 0 },
2363 [DC_127_PRED] = { 0 },
2364 [DC_129_PRED] = { 0 }
2367 av_assert2(mode >= 0 && mode < 10);
2368 mode = mode_conv[mode][have_left][have_top];
2369 if (edges[mode].needs_top) {
2370 uint8_t *top, *topleft;
2371 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2372 int n_px_need_tr = 0;
2374 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2377 // if top of sb64-row, use s->intra_pred_data[] instead of
2378 // dst[-stride] for intra prediction (it contains pre- instead of
2379 // post-loopfilter data)
2381 top = !(row & 7) && !y ?
2382 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2383 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2385 topleft = !(row & 7) && !y ?
2386 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2387 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2388 &dst_inner[-stride_inner];
2392 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2393 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2394 n_px_need + n_px_need_tr <= n_px_have) {
2398 if (n_px_need <= n_px_have) {
2399 memcpy(*a, top, n_px_need);
2401 memcpy(*a, top, n_px_have);
2402 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2403 n_px_need - n_px_have);
2406 memset(*a, 127, n_px_need);
2408 if (edges[mode].needs_topleft) {
2409 if (have_left && have_top) {
2410 (*a)[-1] = topleft[-1];
2412 (*a)[-1] = have_top ? 129 : 127;
2415 if (tx == TX_4X4 && edges[mode].needs_topright) {
2416 if (have_top && have_right &&
2417 n_px_need + n_px_need_tr <= n_px_have) {
2418 memcpy(&(*a)[4], &top[4], 4);
2420 memset(&(*a)[4], (*a)[3], 4);
2425 if (edges[mode].needs_left) {
2427 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2428 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2429 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2431 if (edges[mode].invert_left) {
2432 if (n_px_need <= n_px_have) {
2433 for (i = 0; i < n_px_need; i++)
2434 l[i] = dst[i * stride - 1];
2436 for (i = 0; i < n_px_have; i++)
2437 l[i] = dst[i * stride - 1];
2438 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2441 if (n_px_need <= n_px_have) {
2442 for (i = 0; i < n_px_need; i++)
2443 l[n_px_need - 1 - i] = dst[i * stride - 1];
2445 for (i = 0; i < n_px_have; i++)
2446 l[n_px_need - 1 - i] = dst[i * stride - 1];
2447 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2451 memset(l, 129, 4 << tx);
2458 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2460 VP9Context *s = ctx->priv_data;
2462 int row = s->row, col = s->col;
2463 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2464 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2465 int end_x = FFMIN(2 * (s->cols - col), w4);
2466 int end_y = FFMIN(2 * (s->rows - row), h4);
2467 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2468 int uvstep1d = 1 << b->uvtx, p;
2469 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2470 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2471 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2473 for (n = 0, y = 0; y < end_y; y += step1d) {
2474 uint8_t *ptr = dst, *ptr_r = dst_r;
2475 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2476 ptr_r += 4 * step1d, n += step) {
2477 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2479 uint8_t *a = &a_buf[32];
2480 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2481 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2483 mode = check_intra_mode(s, mode, &a, ptr_r,
2484 s->frames[CUR_FRAME].tf.f->linesize[0],
2485 ptr, s->y_stride, l,
2486 col, x, w4, row, y, b->tx, 0);
2487 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2489 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2490 s->block + 16 * n, eob);
2492 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2493 dst += 4 * step1d * s->y_stride;
2500 step = 1 << (b->uvtx * 2);
2501 for (p = 0; p < 2; p++) {
2502 dst = s->dst[1 + p];
2503 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2504 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2505 uint8_t *ptr = dst, *ptr_r = dst_r;
2506 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2507 ptr_r += 4 * uvstep1d, n += step) {
2508 int mode = b->uvmode;
2509 uint8_t *a = &a_buf[16];
2510 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2512 mode = check_intra_mode(s, mode, &a, ptr_r,
2513 s->frames[CUR_FRAME].tf.f->linesize[1],
2514 ptr, s->uv_stride, l,
2515 col, x, w4, row, y, b->uvtx, p + 1);
2516 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2518 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2519 s->uvblock[p] + 16 * n, eob);
2521 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2522 dst += 4 * uvstep1d * s->uv_stride;
2527 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2528 uint8_t *dst, ptrdiff_t dst_stride,
2529 const uint8_t *ref, ptrdiff_t ref_stride,
2530 ThreadFrame *ref_frame,
2531 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2532 int bw, int bh, int w, int h)
2534 int mx = mv->x, my = mv->y, th;
2538 ref += y * ref_stride + x;
2541 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2542 // we use +7 because the last 7 pixels of each sbrow can be changed in
2543 // the longest loopfilter of the next sbrow
2544 th = (y + bh + 4 * !!my + 7) >> 6;
2545 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2546 if (x < !!mx * 3 || y < !!my * 3 ||
2547 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2548 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2549 ref - !!my * 3 * ref_stride - !!mx * 3,
2551 bw + !!mx * 7, bh + !!my * 7,
2552 x - !!mx * 3, y - !!my * 3, w, h);
2553 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2556 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2559 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2560 uint8_t *dst_u, uint8_t *dst_v,
2561 ptrdiff_t dst_stride,
2562 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2563 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2564 ThreadFrame *ref_frame,
2565 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2566 int bw, int bh, int w, int h)
2568 int mx = mv->x, my = mv->y, th;
2572 ref_u += y * src_stride_u + x;
2573 ref_v += y * src_stride_v + x;
2576 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2577 // we use +7 because the last 7 pixels of each sbrow can be changed in
2578 // the longest loopfilter of the next sbrow
2579 th = (y + bh + 4 * !!my + 7) >> 5;
2580 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2581 if (x < !!mx * 3 || y < !!my * 3 ||
2582 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2583 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2584 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2586 bw + !!mx * 7, bh + !!my * 7,
2587 x - !!mx * 3, y - !!my * 3, w, h);
2588 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2589 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2591 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2592 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2594 bw + !!mx * 7, bh + !!my * 7,
2595 x - !!mx * 3, y - !!my * 3, w, h);
2596 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2597 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2599 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2600 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2604 static void inter_recon(AVCodecContext *ctx)
2606 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2607 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2608 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2610 VP9Context *s = ctx->priv_data;
2612 int row = s->row, col = s->col;
2613 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2614 AVFrame *ref1 = tref1->f, *ref2;
2615 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2616 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2619 tref2 = &s->refs[s->refidx[b->ref[1]]];
2626 if (b->bs > BS_8x8) {
2627 if (b->bs == BS_8x4) {
2628 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2629 ref1->data[0], ref1->linesize[0], tref1,
2630 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2631 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2632 s->dst[0] + 4 * ls_y, ls_y,
2633 ref1->data[0], ref1->linesize[0], tref1,
2634 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2637 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2638 ref2->data[0], ref2->linesize[0], tref2,
2639 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2640 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2641 s->dst[0] + 4 * ls_y, ls_y,
2642 ref2->data[0], ref2->linesize[0], tref2,
2643 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2645 } else if (b->bs == BS_4x8) {
2646 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2647 ref1->data[0], ref1->linesize[0], tref1,
2648 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2649 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2650 ref1->data[0], ref1->linesize[0], tref1,
2651 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2654 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2655 ref2->data[0], ref2->linesize[0], tref2,
2656 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2657 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2658 ref2->data[0], ref2->linesize[0], tref2,
2659 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2662 av_assert2(b->bs == BS_4x4);
2664 // FIXME if two horizontally adjacent blocks have the same MV,
2665 // do a w8 instead of a w4 call
2666 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2667 ref1->data[0], ref1->linesize[0], tref1,
2668 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2669 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2670 ref1->data[0], ref1->linesize[0], tref1,
2671 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2672 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2673 s->dst[0] + 4 * ls_y, ls_y,
2674 ref1->data[0], ref1->linesize[0], tref1,
2675 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2676 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2677 s->dst[0] + 4 * ls_y + 4, ls_y,
2678 ref1->data[0], ref1->linesize[0], tref1,
2679 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2682 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2683 ref2->data[0], ref2->linesize[0], tref2,
2684 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2685 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2686 ref2->data[0], ref2->linesize[0], tref2,
2687 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2688 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2689 s->dst[0] + 4 * ls_y, ls_y,
2690 ref2->data[0], ref2->linesize[0], tref2,
2691 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2692 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2693 s->dst[0] + 4 * ls_y + 4, ls_y,
2694 ref2->data[0], ref2->linesize[0], tref2,
2695 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2699 int bwl = bwlog_tab[0][b->bs];
2700 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2702 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2703 ref1->data[0], ref1->linesize[0], tref1,
2704 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2707 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2708 ref2->data[0], ref2->linesize[0], tref2,
2709 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2714 int bwl = bwlog_tab[1][b->bs];
2715 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2724 if (b->bs > BS_8x8) {
2725 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2726 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2731 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2732 s->dst[1], s->dst[2], ls_uv,
2733 ref1->data[1], ref1->linesize[1],
2734 ref1->data[2], ref1->linesize[2], tref1,
2735 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2738 if (b->bs > BS_8x8) {
2739 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2740 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2744 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2745 s->dst[1], s->dst[2], ls_uv,
2746 ref2->data[1], ref2->linesize[1],
2747 ref2->data[2], ref2->linesize[2], tref2,
2748 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2753 /* mostly copied intra_reconn() */
2755 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2756 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2757 int end_x = FFMIN(2 * (s->cols - col), w4);
2758 int end_y = FFMIN(2 * (s->rows - row), h4);
2759 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2760 int uvstep1d = 1 << b->uvtx, p;
2761 uint8_t *dst = s->dst[0];
2764 for (n = 0, y = 0; y < end_y; y += step1d) {
2766 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2767 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2770 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2771 s->block + 16 * n, eob);
2773 dst += 4 * s->y_stride * step1d;
2779 step = 1 << (b->uvtx * 2);
2780 for (p = 0; p < 2; p++) {
2781 dst = s->dst[p + 1];
2782 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2784 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2785 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2788 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2789 s->uvblock[p] + 16 * n, eob);
2791 dst += 4 * uvstep1d * s->uv_stride;
2797 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2798 int row_and_7, int col_and_7,
2799 int w, int h, int col_end, int row_end,
2800 enum TxfmMode tx, int skip_inter)
2802 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2803 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2804 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2805 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2807 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2808 // edges. This means that for UV, we work on two subsampled blocks at
2809 // a time, and we only use the topleft block's mode information to set
2810 // things like block strength. Thus, for any block size smaller than
2811 // 16x16, ignore the odd portion of the block.
2812 if (tx == TX_4X4 && is_uv) {
2827 if (tx == TX_4X4 && !skip_inter) {
2828 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2829 int m_col_odd = (t << (w - 1)) - t;
2831 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2833 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2835 for (y = row_and_7; y < h + row_and_7; y++) {
2836 int col_mask_id = 2 - !(y & 7);
2838 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2839 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2840 // for odd lines, if the odd col is not being filtered,
2841 // skip odd row also:
2848 // if a/c are even row/col and b/d are odd, and d is skipped,
2849 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2850 if ((col_end & 1) && (y & 1)) {
2851 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2853 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2857 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2859 for (y = row_and_7; y < h + row_and_7; y++) {
2860 int col_mask_id = 2 - !(y & 3);
2862 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2863 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2864 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2865 lflvl->mask[is_uv][0][y][3] |= m_col;
2866 lflvl->mask[is_uv][1][y][3] |= m_col;
2870 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2873 int mask_id = (tx == TX_8X8);
2874 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2875 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2876 int m_row = m_col & masks[l2];
2878 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2879 // 8wd loopfilter to prevent going off the visible edge.
2880 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2881 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2882 int m_row_8 = m_row - m_row_16;
2884 for (y = row_and_7; y < h + row_and_7; y++) {
2885 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2886 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2889 for (y = row_and_7; y < h + row_and_7; y++)
2890 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2893 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2894 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2895 lflvl->mask[is_uv][1][y][0] |= m_col;
2896 if (y - row_and_7 == h - 1)
2897 lflvl->mask[is_uv][1][y][1] |= m_col;
2899 for (y = row_and_7; y < h + row_and_7; y += step1d)
2900 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2902 } else if (tx != TX_4X4) {
2905 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2906 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2907 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2908 for (y = row_and_7; y < h + row_and_7; y++)
2909 lflvl->mask[is_uv][0][y][mask_id] |= t;
2911 int t8 = t & 0x01, t4 = t - t8;
2913 for (y = row_and_7; y < h + row_and_7; y++) {
2914 lflvl->mask[is_uv][0][y][2] |= t4;
2915 lflvl->mask[is_uv][0][y][1] |= t8;
2917 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2919 int t8 = t & 0x11, t4 = t - t8;
2921 for (y = row_and_7; y < h + row_and_7; y++) {
2922 lflvl->mask[is_uv][0][y][2] |= t4;
2923 lflvl->mask[is_uv][0][y][1] |= t8;
2925 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2930 static void decode_b(AVCodecContext *ctx, int row, int col,
2931 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2932 enum BlockLevel bl, enum BlockPartition bp)
2934 VP9Context *s = ctx->priv_data;
2936 enum BlockSize bs = bl * 3 + bp;
2937 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2939 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2945 s->min_mv.x = -(128 + col * 64);
2946 s->min_mv.y = -(128 + row * 64);
2947 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2948 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2954 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2961 #define SPLAT_ZERO_CTX(v, n) \
2963 case 1: v = 0; break; \
2964 case 2: AV_ZERO16(&v); break; \
2965 case 4: AV_ZERO32(&v); break; \
2966 case 8: AV_ZERO64(&v); break; \
2967 case 16: AV_ZERO128(&v); break; \
2969 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2971 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2972 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2973 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2977 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2978 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2979 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2980 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2983 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2984 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2985 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2986 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2991 s->block += w4 * h4 * 64;
2992 s->uvblock[0] += w4 * h4 * 16;
2993 s->uvblock[1] += w4 * h4 * 16;
2994 s->eob += 4 * w4 * h4;
2995 s->uveob[0] += w4 * h4;
2996 s->uveob[1] += w4 * h4;
3002 // emulated overhangs if the stride of the target buffer can't hold. This
3003 // allows to support emu-edge and so on even if we have large block
3005 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3006 (row + h4) > s->rows;
3007 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3008 (row + h4) > s->rows;
3010 s->dst[0] = s->tmp_y;
3013 s->dst[0] = f->data[0] + yoff;
3014 s->y_stride = f->linesize[0];
3017 s->dst[1] = s->tmp_uv[0];
3018 s->dst[2] = s->tmp_uv[1];
3021 s->dst[1] = f->data[1] + uvoff;
3022 s->dst[2] = f->data[2] + uvoff;
3023 s->uv_stride = f->linesize[1];
3026 intra_recon(ctx, yoff, uvoff);
3031 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3033 for (n = 0; o < w; n++) {
3038 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3039 s->tmp_y + o, 64, h, 0, 0);
3045 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3047 for (n = 1; o < w; n++) {
3052 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3053 s->tmp_uv[0] + o, 32, h, 0, 0);
3054 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3055 s->tmp_uv[1] + o, 32, h, 0, 0);
3061 // pick filter level and find edges to apply filter to
3062 if (s->filter.level &&
3063 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3064 [b->mode[3] != ZEROMV]) > 0) {
3065 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3066 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3068 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3069 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3070 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3071 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3072 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3073 b->uvtx, skip_inter);
3075 if (!s->filter.lim_lut[lvl]) {
3076 int sharp = s->filter.sharpness;
3080 limit >>= (sharp + 3) >> 2;
3081 limit = FFMIN(limit, 9 - sharp);
3083 limit = FFMAX(limit, 1);
3085 s->filter.lim_lut[lvl] = limit;
3086 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3092 s->block += w4 * h4 * 64;
3093 s->uvblock[0] += w4 * h4 * 16;
3094 s->uvblock[1] += w4 * h4 * 16;
3095 s->eob += 4 * w4 * h4;
3096 s->uveob[0] += w4 * h4;
3097 s->uveob[1] += w4 * h4;
3101 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3102 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3104 VP9Context *s = ctx->priv_data;
3105 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3106 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3107 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3108 s->prob.p.partition[bl][c];
3109 enum BlockPartition bp;
3110 ptrdiff_t hbs = 4 >> bl;
3111 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3112 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3115 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3116 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3117 } else if (col + hbs < s->cols) { // FIXME why not <=?
3118 if (row + hbs < s->rows) { // FIXME why not <=?
3119 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3121 case PARTITION_NONE:
3122 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3125 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3126 yoff += hbs * 8 * y_stride;
3127 uvoff += hbs * 4 * uv_stride;
3128 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3131 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3134 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3136 case PARTITION_SPLIT:
3137 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3138 decode_sb(ctx, row, col + hbs, lflvl,
3139 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3140 yoff += hbs * 8 * y_stride;
3141 uvoff += hbs * 4 * uv_stride;
3142 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3143 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3144 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3149 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3150 bp = PARTITION_SPLIT;
3151 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3152 decode_sb(ctx, row, col + hbs, lflvl,
3153 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3156 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3158 } else if (row + hbs < s->rows) { // FIXME why not <=?
3159 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3160 bp = PARTITION_SPLIT;
3161 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3162 yoff += hbs * 8 * y_stride;
3163 uvoff += hbs * 4 * uv_stride;
3164 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3167 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3170 bp = PARTITION_SPLIT;
3171 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3173 s->counts.partition[bl][c][bp]++;
3176 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3177 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3179 VP9Context *s = ctx->priv_data;
3181 ptrdiff_t hbs = 4 >> bl;
3182 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3183 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3186 av_assert2(b->bl == BL_8X8);
3187 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3188 } else if (s->b->bl == bl) {
3189 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3190 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3191 yoff += hbs * 8 * y_stride;
3192 uvoff += hbs * 4 * uv_stride;
3193 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3194 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3197 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3200 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3201 if (col + hbs < s->cols) { // FIXME why not <=?
3202 if (row + hbs < s->rows) {
3203 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3204 uvoff + 4 * hbs, bl + 1);
3205 yoff += hbs * 8 * y_stride;
3206 uvoff += hbs * 4 * uv_stride;
3207 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3208 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3209 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3213 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3215 } else if (row + hbs < s->rows) {
3216 yoff += hbs * 8 * y_stride;
3217 uvoff += hbs * 4 * uv_stride;
3218 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3223 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3224 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3226 VP9Context *s = ctx->priv_data;
3227 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3228 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3229 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3232 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3233 // if you think of them as acting on a 8x8 block max, we can interleave
3234 // each v/h within the single x loop, but that only works if we work on
3235 // 8 pixel blocks, and we won't always do that (we want at least 16px
3236 // to use SSE2 optimizations, perhaps 32 for AVX2)
3238 // filter edges between columns, Y plane (e.g. block1 | block2)
3239 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3240 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3241 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3242 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3243 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3244 unsigned hm = hm1 | hm2 | hm13 | hm23;
3246 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3248 int L = *l, H = L >> 4;
3249 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3252 if (hmask1[0] & x) {
3253 if (hmask2[0] & x) {
3254 av_assert2(l[8] == L);
3255 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3257 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3259 } else if (hm2 & x) {
3262 E |= s->filter.mblim_lut[L] << 8;
3263 I |= s->filter.lim_lut[L] << 8;
3264 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3266 [0](ptr, ls_y, E, I, H);
3268 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3269 [0](ptr, ls_y, E, I, H);
3272 } else if (hm2 & x) {
3273 int L = l[8], H = L >> 4;
3274 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3277 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3278 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3282 int L = *l, H = L >> 4;
3283 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3288 E |= s->filter.mblim_lut[L] << 8;
3289 I |= s->filter.lim_lut[L] << 8;
3290 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3292 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3294 } else if (hm23 & x) {
3295 int L = l[8], H = L >> 4;
3296 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3298 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3304 // filter edges between rows, Y plane (e.g. ------)
3306 dst = f->data[0] + yoff;
3308 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3309 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3310 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3312 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3315 int L = *l, H = L >> 4;
3316 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3319 if (vmask[0] & (x << 1)) {
3320 av_assert2(l[1] == L);
3321 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3323 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3325 } else if (vm & (x << 1)) {
3328 E |= s->filter.mblim_lut[L] << 8;
3329 I |= s->filter.lim_lut[L] << 8;
3330 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3331 [!!(vmask[1] & (x << 1))]
3332 [1](ptr, ls_y, E, I, H);
3334 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3335 [1](ptr, ls_y, E, I, H);
3337 } else if (vm & (x << 1)) {
3338 int L = l[1], H = L >> 4;
3339 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3341 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3342 [1](ptr + 8, ls_y, E, I, H);
3346 int L = *l, H = L >> 4;
3347 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3349 if (vm3 & (x << 1)) {
3352 E |= s->filter.mblim_lut[L] << 8;
3353 I |= s->filter.lim_lut[L] << 8;
3354 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3356 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3358 } else if (vm3 & (x << 1)) {
3359 int L = l[1], H = L >> 4;
3360 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3362 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3367 // same principle but for U/V planes
3368 for (p = 0; p < 2; p++) {
3370 dst = f->data[1 + p] + uvoff;
3371 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3372 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3373 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3374 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3375 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3377 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3380 int L = *l, H = L >> 4;
3381 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3383 if (hmask1[0] & x) {
3384 if (hmask2[0] & x) {
3385 av_assert2(l[16] == L);
3386 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3388 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3390 } else if (hm2 & x) {
3393 E |= s->filter.mblim_lut[L] << 8;
3394 I |= s->filter.lim_lut[L] << 8;
3395 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3397 [0](ptr, ls_uv, E, I, H);
3399 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3400 [0](ptr, ls_uv, E, I, H);
3402 } else if (hm2 & x) {
3403 int L = l[16], H = L >> 4;
3404 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3406 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3407 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3415 dst = f->data[1 + p] + uvoff;
3416 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3417 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3418 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3420 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3423 int L = *l, H = L >> 4;
3424 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3427 if (vmask[0] & (x << 2)) {
3428 av_assert2(l[2] == L);
3429 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3431 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3433 } else if (vm & (x << 2)) {
3436 E |= s->filter.mblim_lut[L] << 8;
3437 I |= s->filter.lim_lut[L] << 8;
3438 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3439 [!!(vmask[1] & (x << 2))]
3440 [1](ptr, ls_uv, E, I, H);
3442 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3443 [1](ptr, ls_uv, E, I, H);
3445 } else if (vm & (x << 2)) {
3446 int L = l[2], H = L >> 4;
3447 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3449 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3450 [1](ptr + 8, ls_uv, E, I, H);
3460 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3462 int sb_start = ( idx * n) >> log2_n;
3463 int sb_end = ((idx + 1) * n) >> log2_n;
3464 *start = FFMIN(sb_start, n) << 3;
3465 *end = FFMIN(sb_end, n) << 3;
3468 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3469 int max_count, int update_factor)
3471 unsigned ct = ct0 + ct1, p2, p1;
3477 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3478 p2 = av_clip(p2, 1, 255);
3479 ct = FFMIN(ct, max_count);
3480 update_factor = FASTDIV(update_factor * ct, max_count);
3482 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3483 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3486 static void adapt_probs(VP9Context *s)
3489 prob_context *p = &s->prob_ctx[s->framectxid].p;
3490 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3493 for (i = 0; i < 4; i++)
3494 for (j = 0; j < 2; j++)
3495 for (k = 0; k < 2; k++)
3496 for (l = 0; l < 6; l++)
3497 for (m = 0; m < 6; m++) {
3498 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3499 unsigned *e = s->counts.eob[i][j][k][l][m];
3500 unsigned *c = s->counts.coef[i][j][k][l][m];
3502 if (l == 0 && m >= 3) // dc only has 3 pt
3505 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3506 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3507 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3510 if (s->keyframe || s->intraonly) {
3511 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3512 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3513 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3514 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3519 for (i = 0; i < 3; i++)
3520 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3523 for (i = 0; i < 4; i++)
3524 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3527 if (s->comppredmode == PRED_SWITCHABLE) {
3528 for (i = 0; i < 5; i++)
3529 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3533 if (s->comppredmode != PRED_SINGLEREF) {
3534 for (i = 0; i < 5; i++)
3535 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3536 s->counts.comp_ref[i][1], 20, 128);
3539 if (s->comppredmode != PRED_COMPREF) {
3540 for (i = 0; i < 5; i++) {
3541 uint8_t *pp = p->single_ref[i];
3542 unsigned (*c)[2] = s->counts.single_ref[i];
3544 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3545 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3549 // block partitioning
3550 for (i = 0; i < 4; i++)
3551 for (j = 0; j < 4; j++) {
3552 uint8_t *pp = p->partition[i][j];
3553 unsigned *c = s->counts.partition[i][j];
3555 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3556 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3557 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3561 if (s->txfmmode == TX_SWITCHABLE) {
3562 for (i = 0; i < 2; i++) {
3563 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3565 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3566 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3567 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3568 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3569 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3570 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3574 // interpolation filter
3575 if (s->filtermode == FILTER_SWITCHABLE) {
3576 for (i = 0; i < 4; i++) {
3577 uint8_t *pp = p->filter[i];
3578 unsigned *c = s->counts.filter[i];
3580 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3581 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3586 for (i = 0; i < 7; i++) {
3587 uint8_t *pp = p->mv_mode[i];
3588 unsigned *c = s->counts.mv_mode[i];
3590 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3591 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3592 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3597 uint8_t *pp = p->mv_joint;
3598 unsigned *c = s->counts.mv_joint;
3600 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3601 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3602 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3606 for (i = 0; i < 2; i++) {
3608 unsigned *c, (*c2)[2], sum;
3610 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3611 s->counts.mv_comp[i].sign[1], 20, 128);
3613 pp = p->mv_comp[i].classes;
3614 c = s->counts.mv_comp[i].classes;
3615 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3616 adapt_prob(&pp[0], c[0], sum, 20, 128);
3618 adapt_prob(&pp[1], c[1], sum, 20, 128);
3620 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3621 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3623 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3624 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3626 adapt_prob(&pp[6], c[6], sum, 20, 128);
3627 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3628 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3629 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3631 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3632 s->counts.mv_comp[i].class0[1], 20, 128);
3633 pp = p->mv_comp[i].bits;
3634 c2 = s->counts.mv_comp[i].bits;
3635 for (j = 0; j < 10; j++)
3636 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3638 for (j = 0; j < 2; j++) {
3639 pp = p->mv_comp[i].class0_fp[j];
3640 c = s->counts.mv_comp[i].class0_fp[j];
3641 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3642 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3643 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3645 pp = p->mv_comp[i].fp;
3646 c = s->counts.mv_comp[i].fp;
3647 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3648 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3649 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3651 if (s->highprecisionmvs) {
3652 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3653 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3654 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3655 s->counts.mv_comp[i].hp[1], 20, 128);
3660 for (i = 0; i < 4; i++) {
3661 uint8_t *pp = p->y_mode[i];
3662 unsigned *c = s->counts.y_mode[i], sum, s2;
3664 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3665 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3666 sum -= c[TM_VP8_PRED];
3667 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3668 sum -= c[VERT_PRED];
3669 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3670 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3672 adapt_prob(&pp[3], s2, sum, 20, 128);
3674 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3675 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3676 sum -= c[DIAG_DOWN_LEFT_PRED];
3677 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3678 sum -= c[VERT_LEFT_PRED];
3679 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3680 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3684 for (i = 0; i < 10; i++) {
3685 uint8_t *pp = p->uv_mode[i];
3686 unsigned *c = s->counts.uv_mode[i], sum, s2;
3688 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3689 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3690 sum -= c[TM_VP8_PRED];
3691 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3692 sum -= c[VERT_PRED];
3693 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3694 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3696 adapt_prob(&pp[3], s2, sum, 20, 128);
3698 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3699 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3700 sum -= c[DIAG_DOWN_LEFT_PRED];
3701 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3702 sum -= c[VERT_LEFT_PRED];
3703 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3704 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3708 static void free_buffers(VP9Context *s)
3710 av_freep(&s->intra_pred_data[0]);
3711 av_freep(&s->b_base);
3712 av_freep(&s->block_base);
3715 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3717 VP9Context *s = ctx->priv_data;
3720 for (i = 0; i < 3; i++) {
3721 if (s->frames[i].tf.f->data[0])
3722 vp9_unref_frame(ctx, &s->frames[i]);
3723 av_frame_free(&s->frames[i].tf.f);
3725 for (i = 0; i < 8; i++) {
3726 if (s->refs[i].f->data[0])
3727 ff_thread_release_buffer(ctx, &s->refs[i]);
3728 av_frame_free(&s->refs[i].f);
3729 if (s->next_refs[i].f->data[0])
3730 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3731 av_frame_free(&s->next_refs[i].f);
3741 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3742 int *got_frame, AVPacket *pkt)
3744 const uint8_t *data = pkt->data;
3745 int size = pkt->size;
3746 VP9Context *s = ctx->priv_data;
3747 int res, tile_row, tile_col, i, ref, row, col;
3748 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3749 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3752 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3754 } else if (res == 0) {
3755 if (!s->refs[ref].f->data[0]) {
3756 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3757 return AVERROR_INVALIDDATA;
3759 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3767 if (!retain_segmap_ref) {
3768 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3769 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3770 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3771 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3774 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3775 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3776 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3777 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3779 if (s->frames[CUR_FRAME].tf.f->data[0])
3780 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3781 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3783 f = s->frames[CUR_FRAME].tf.f;
3784 f->key_frame = s->keyframe;
3785 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3786 ls_y = f->linesize[0];
3787 ls_uv =f->linesize[1];
3790 for (i = 0; i < 8; i++) {
3791 if (s->next_refs[i].f->data[0])
3792 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3793 if (s->refreshrefmask & (1 << i)) {
3794 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3796 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3803 ctx->color_range = AVCOL_RANGE_JPEG;
3805 ctx->color_range = AVCOL_RANGE_MPEG;
3807 switch (s->colorspace) {
3808 case 1: ctx->colorspace = AVCOL_SPC_BT470BG; break;
3809 case 2: ctx->colorspace = AVCOL_SPC_BT709; break;
3810 case 3: ctx->colorspace = AVCOL_SPC_SMPTE170M; break;
3811 case 4: ctx->colorspace = AVCOL_SPC_SMPTE240M; break;
3814 // main tile decode loop
3815 memset(s->above_partition_ctx, 0, s->cols);
3816 memset(s->above_skip_ctx, 0, s->cols);
3817 if (s->keyframe || s->intraonly) {
3818 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3820 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3822 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3823 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3824 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3825 memset(s->above_segpred_ctx, 0, s->cols);
3826 s->pass = s->frames[CUR_FRAME].uses_2pass =
3827 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3828 if ((res = update_block_buffers(ctx)) < 0) {
3829 av_log(ctx, AV_LOG_ERROR,
3830 "Failed to allocate block buffers\n");
3833 if (s->refreshctx && s->parallelmode) {
3836 for (i = 0; i < 4; i++) {
3837 for (j = 0; j < 2; j++)
3838 for (k = 0; k < 2; k++)
3839 for (l = 0; l < 6; l++)
3840 for (m = 0; m < 6; m++)
3841 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3842 s->prob.coef[i][j][k][l][m], 3);
3843 if (s->txfmmode == i)
3846 s->prob_ctx[s->framectxid].p = s->prob.p;
3847 ff_thread_finish_setup(ctx);
3848 } else if (!s->refreshctx) {
3849 ff_thread_finish_setup(ctx);
3855 s->block = s->block_base;
3856 s->uvblock[0] = s->uvblock_base[0];
3857 s->uvblock[1] = s->uvblock_base[1];
3858 s->eob = s->eob_base;
3859 s->uveob[0] = s->uveob_base[0];
3860 s->uveob[1] = s->uveob_base[1];
3862 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3863 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3864 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3866 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3869 if (tile_col == s->tiling.tile_cols - 1 &&
3870 tile_row == s->tiling.tile_rows - 1) {
3873 tile_size = AV_RB32(data);
3877 if (tile_size > size) {
3878 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3879 return AVERROR_INVALIDDATA;
3881 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3882 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3883 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3884 return AVERROR_INVALIDDATA;
3891 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3892 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3893 struct VP9Filter *lflvl_ptr = s->lflvl;
3894 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3896 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3897 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3898 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3901 memset(s->left_partition_ctx, 0, 8);
3902 memset(s->left_skip_ctx, 0, 8);
3903 if (s->keyframe || s->intraonly) {
3904 memset(s->left_mode_ctx, DC_PRED, 16);
3906 memset(s->left_mode_ctx, NEARESTMV, 8);
3908 memset(s->left_y_nnz_ctx, 0, 16);
3909 memset(s->left_uv_nnz_ctx, 0, 16);
3910 memset(s->left_segpred_ctx, 0, 8);
3912 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3915 for (col = s->tiling.tile_col_start;
3916 col < s->tiling.tile_col_end;
3917 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3918 // FIXME integrate with lf code (i.e. zero after each
3919 // use, similar to invtxfm coefficients, or similar)
3921 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3925 decode_sb_mem(ctx, row, col, lflvl_ptr,
3926 yoff2, uvoff2, BL_64X64);
3928 decode_sb(ctx, row, col, lflvl_ptr,
3929 yoff2, uvoff2, BL_64X64);
3933 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3941 // backup pre-loopfilter reconstruction data for intra
3942 // prediction of next row of sb64s
3943 if (row + 8 < s->rows) {
3944 memcpy(s->intra_pred_data[0],
3945 f->data[0] + yoff + 63 * ls_y,
3947 memcpy(s->intra_pred_data[1],
3948 f->data[1] + uvoff + 31 * ls_uv,
3950 memcpy(s->intra_pred_data[2],
3951 f->data[2] + uvoff + 31 * ls_uv,
3955 // loopfilter one row
3956 if (s->filter.level) {
3959 lflvl_ptr = s->lflvl;
3960 for (col = 0; col < s->cols;
3961 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3962 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3966 // FIXME maybe we can make this more finegrained by running the
3967 // loopfilter per-block instead of after each sbrow
3968 // In fact that would also make intra pred left preparation easier?
3969 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3973 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3975 ff_thread_finish_setup(ctx);
3977 } while (s->pass++ == 1);
3978 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3981 for (i = 0; i < 8; i++) {
3982 if (s->refs[i].f->data[0])
3983 ff_thread_release_buffer(ctx, &s->refs[i]);
3984 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3987 if (!s->invisible) {
3988 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3996 static void vp9_decode_flush(AVCodecContext *ctx)
3998 VP9Context *s = ctx->priv_data;
4001 for (i = 0; i < 3; i++)
4002 vp9_unref_frame(ctx, &s->frames[i]);
4003 for (i = 0; i < 8; i++)
4004 ff_thread_release_buffer(ctx, &s->refs[i]);
4007 static int init_frames(AVCodecContext *ctx)
4009 VP9Context *s = ctx->priv_data;
4012 for (i = 0; i < 3; i++) {
4013 s->frames[i].tf.f = av_frame_alloc();
4014 if (!s->frames[i].tf.f) {
4015 vp9_decode_free(ctx);
4016 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4017 return AVERROR(ENOMEM);
4020 for (i = 0; i < 8; i++) {
4021 s->refs[i].f = av_frame_alloc();
4022 s->next_refs[i].f = av_frame_alloc();
4023 if (!s->refs[i].f || !s->next_refs[i].f) {
4024 vp9_decode_free(ctx);
4025 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4026 return AVERROR(ENOMEM);
4033 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4035 VP9Context *s = ctx->priv_data;
4037 ctx->internal->allocate_progress = 1;
4038 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4039 ff_vp9dsp_init(&s->dsp);
4040 ff_videodsp_init(&s->vdsp, 8);
4041 s->filter.sharpness = -1;
4043 return init_frames(ctx);
4046 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4048 return init_frames(avctx);
4051 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4054 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4056 // detect size changes in other threads
4057 if (s->intra_pred_data[0] &&
4058 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4062 for (i = 0; i < 3; i++) {
4063 if (s->frames[i].tf.f->data[0])
4064 vp9_unref_frame(dst, &s->frames[i]);
4065 if (ssrc->frames[i].tf.f->data[0]) {
4066 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4070 for (i = 0; i < 8; i++) {
4071 if (s->refs[i].f->data[0])
4072 ff_thread_release_buffer(dst, &s->refs[i]);
4073 if (ssrc->next_refs[i].f->data[0]) {
4074 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4079 s->invisible = ssrc->invisible;
4080 s->keyframe = ssrc->keyframe;
4081 s->segmentation.enabled = ssrc->segmentation.enabled;
4082 s->segmentation.update_map = ssrc->segmentation.update_map;
4083 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4084 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4085 if (ssrc->segmentation.enabled) {
4086 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4087 sizeof(s->segmentation.feat));
4093 AVCodec ff_vp9_decoder = {
4095 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4096 .type = AVMEDIA_TYPE_VIDEO,
4097 .id = AV_CODEC_ID_VP9,
4098 .priv_data_size = sizeof(VP9Context),
4099 .init = vp9_decode_init,
4100 .close = vp9_decode_free,
4101 .decode = vp9_decode_frame,
4102 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4103 .flush = vp9_decode_flush,
4104 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4105 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),