2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
82 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
83 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
86 typedef struct VP9Block {
87 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
88 enum FilterMode filter;
89 VP56mv mv[4 /* b_idx */][2 /* ref */];
91 enum TxfmMode tx, uvtx;
93 enum BlockPartition bp;
96 typedef struct VP9Context {
103 VP9Block *b_base, *b;
105 int row, row7, col, col7;
107 ptrdiff_t y_stride, uv_stride;
111 uint8_t keyframe, last_keyframe;
113 uint8_t use_last_frame_mvs;
119 uint8_t refreshrefmask;
120 uint8_t highprecisionmvs;
121 enum FilterMode filtermode;
122 uint8_t allowcompinter;
125 uint8_t parallelmode;
129 uint8_t varcompref[2];
130 ThreadFrame refs[8], next_refs[8];
132 #define REF_FRAME_MVPAIR 1
133 #define REF_FRAME_SEGMAP 2
140 uint8_t mblim_lut[64];
148 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
150 #define MAX_SEGMENT 8
154 uint8_t absolute_vals;
160 uint8_t skip_enabled;
169 unsigned log2_tile_cols, log2_tile_rows;
170 unsigned tile_cols, tile_rows;
171 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
173 unsigned sb_cols, sb_rows, rows, cols;
176 uint8_t coef[4][2][2][6][6][3];
180 uint8_t coef[4][2][2][6][6][11];
185 unsigned y_mode[4][10];
186 unsigned uv_mode[10][10];
187 unsigned filter[4][3];
188 unsigned mv_mode[7][4];
189 unsigned intra[4][2];
191 unsigned single_ref[5][2][2];
192 unsigned comp_ref[5][2];
193 unsigned tx32p[2][4];
194 unsigned tx16p[2][3];
197 unsigned mv_joint[4];
200 unsigned classes[11];
202 unsigned bits[10][2];
203 unsigned class0_fp[2][4];
205 unsigned class0_hp[2];
208 unsigned partition[4][4][4];
209 unsigned coef[4][2][2][6][6][3];
210 unsigned eob[4][2][2][6][6][2];
212 enum TxfmMode txfmmode;
213 enum CompPredMode comppredmode;
215 // contextual (left/above) cache
216 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
217 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
218 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
219 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
220 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
226 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
227 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
228 uint8_t *above_partition_ctx;
229 uint8_t *above_mode_ctx;
230 // FIXME maybe merge some of the below in a flags field?
231 uint8_t *above_y_nnz_ctx;
232 uint8_t *above_uv_nnz_ctx[2];
233 uint8_t *above_skip_ctx; // 1bit
234 uint8_t *above_txfm_ctx; // 2bit
235 uint8_t *above_segpred_ctx; // 1bit
236 uint8_t *above_intra_ctx; // 1bit
237 uint8_t *above_comp_ctx; // 1bit
238 uint8_t *above_ref_ctx; // 2bit
239 uint8_t *above_filter_ctx;
240 VP56mv (*above_mv_ctx)[2];
243 uint8_t *intra_pred_data[3];
244 struct VP9Filter *lflvl;
245 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[135*144];
247 // block reconstruction intermediates
248 int block_alloc_using_2pass;
249 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
250 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
251 struct { int x, y; } min_mv, max_mv;
252 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
253 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
254 uint16_t mvscale[3][2];
255 uint8_t mvstep[3][2];
258 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
260 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
261 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
263 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
264 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
268 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
270 VP9Context *s = ctx->priv_data;
273 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
275 sz = 64 * s->sb_cols * s->sb_rows;
276 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
277 ff_thread_release_buffer(ctx, &f->tf);
278 return AVERROR(ENOMEM);
281 f->segmentation_map = f->extradata->data;
282 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
287 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
289 ff_thread_release_buffer(ctx, &f->tf);
290 av_buffer_unref(&f->extradata);
293 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
297 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
299 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
300 vp9_unref_frame(ctx, dst);
301 return AVERROR(ENOMEM);
304 dst->segmentation_map = src->segmentation_map;
306 dst->uses_2pass = src->uses_2pass;
311 static int update_size(AVCodecContext *ctx, int w, int h)
313 VP9Context *s = ctx->priv_data;
316 av_assert0(w > 0 && h > 0);
318 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
332 return AVERROR(ENOMEM);
333 assign(s->intra_pred_data[0], uint8_t *, 64);
334 assign(s->intra_pred_data[1], uint8_t *, 32);
335 assign(s->intra_pred_data[2], uint8_t *, 32);
336 assign(s->above_y_nnz_ctx, uint8_t *, 16);
337 assign(s->above_mode_ctx, uint8_t *, 16);
338 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
339 assign(s->above_partition_ctx, uint8_t *, 8);
340 assign(s->above_skip_ctx, uint8_t *, 8);
341 assign(s->above_txfm_ctx, uint8_t *, 8);
342 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
344 assign(s->above_segpred_ctx, uint8_t *, 8);
345 assign(s->above_intra_ctx, uint8_t *, 8);
346 assign(s->above_comp_ctx, uint8_t *, 8);
347 assign(s->above_ref_ctx, uint8_t *, 8);
348 assign(s->above_filter_ctx, uint8_t *, 8);
349 assign(s->lflvl, struct VP9Filter *, 1);
352 // these will be re-allocated a little later
353 av_freep(&s->b_base);
354 av_freep(&s->block_base);
359 static int update_block_buffers(AVCodecContext *ctx)
361 VP9Context *s = ctx->priv_data;
363 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->frames[CUR_FRAME].uses_2pass)
367 av_free(s->block_base);
368 if (s->frames[CUR_FRAME].uses_2pass) {
369 int sbs = s->sb_cols * s->sb_rows;
371 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
372 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
373 if (!s->b_base || !s->block_base)
374 return AVERROR(ENOMEM);
375 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
376 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
377 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
378 s->uveob_base[0] = s->eob_base + 256 * sbs;
379 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
381 s->b_base = av_malloc(sizeof(VP9Block));
382 s->block_base = av_mallocz((64 * 64 + 128) * 3);
383 if (!s->b_base || !s->block_base)
384 return AVERROR(ENOMEM);
385 s->uvblock_base[0] = s->block_base + 64 * 64;
386 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
387 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
388 s->uveob_base[0] = s->eob_base + 256;
389 s->uveob_base[1] = s->uveob_base[0] + 64;
391 s->block_alloc_using_2pass = s->frames[CUR_FRAME].uses_2pass;
396 // for some reason the sign bit is at the end, not the start, of a bit sequence
397 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
399 int v = get_bits(gb, n);
400 return get_bits1(gb) ? -v : v;
403 static av_always_inline int inv_recenter_nonneg(int v, int m)
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
408 // differential forward probability updates
409 static int update_prob(VP56RangeCoder *c, int p)
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
434 /* This code is trying to do a differential probability update. For a
435 * current probability A in the range [1, 255], the difference to a new
436 * probability of any value can be expressed differentially as 1-A,255-A
437 * where some part of this (absolute range) exists both in positive as
438 * well as the negative part, whereas another part only exists in one
439 * half. We're trying to code this shared part differentially, i.e.
440 * times two where the value of the lowest bit specifies the sign, and
441 * the single part is then coded on top of this. This absolute difference
442 * then again has a value of [0,254], but a bigger value in this range
443 * indicates that we're further away from the original value A, so we
444 * can code this as a VLC code, since higher values are increasingly
445 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
446 * updates vs. the 'fine, exact' updates further down the range, which
447 * adds one extra dimension to this differential update model. */
449 if (!vp8_rac_get(c)) {
450 d = vp8_rac_get_uint(c, 4) + 0;
451 } else if (!vp8_rac_get(c)) {
452 d = vp8_rac_get_uint(c, 4) + 16;
453 } else if (!vp8_rac_get(c)) {
454 d = vp8_rac_get_uint(c, 5) + 32;
456 d = vp8_rac_get_uint(c, 7);
458 d = (d << 1) - 65 + vp8_rac_get(c);
462 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
463 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
466 static int decode_frame_header(AVCodecContext *ctx,
467 const uint8_t *data, int size, int *ref)
469 VP9Context *s = ctx->priv_data;
470 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
472 const uint8_t *data2;
475 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
476 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
479 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
480 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
481 return AVERROR_INVALIDDATA;
483 s->profile = get_bits1(&s->gb);
484 if (get_bits1(&s->gb)) { // reserved bit
485 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
486 return AVERROR_INVALIDDATA;
488 if (get_bits1(&s->gb)) {
489 *ref = get_bits(&s->gb, 3);
492 s->last_keyframe = s->keyframe;
493 s->keyframe = !get_bits1(&s->gb);
494 last_invisible = s->invisible;
495 s->invisible = !get_bits1(&s->gb);
496 s->errorres = get_bits1(&s->gb);
497 s->use_last_frame_mvs = !s->errorres && !last_invisible;
499 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
500 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
501 return AVERROR_INVALIDDATA;
503 s->colorspace = get_bits(&s->gb, 3);
504 if (s->colorspace == 7) { // RGB = profile 1
505 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
506 return AVERROR_INVALIDDATA;
508 s->fullrange = get_bits1(&s->gb);
509 // for profile 1, here follows the subsampling bits
510 s->refreshrefmask = 0xff;
511 w = get_bits(&s->gb, 16) + 1;
512 h = get_bits(&s->gb, 16) + 1;
513 if (get_bits1(&s->gb)) // display size
514 skip_bits(&s->gb, 32);
516 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
517 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
519 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
520 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
521 return AVERROR_INVALIDDATA;
523 s->refreshrefmask = get_bits(&s->gb, 8);
524 w = get_bits(&s->gb, 16) + 1;
525 h = get_bits(&s->gb, 16) + 1;
526 if (get_bits1(&s->gb)) // display size
527 skip_bits(&s->gb, 32);
529 s->refreshrefmask = get_bits(&s->gb, 8);
530 s->refidx[0] = get_bits(&s->gb, 3);
531 s->signbias[0] = get_bits1(&s->gb);
532 s->refidx[1] = get_bits(&s->gb, 3);
533 s->signbias[1] = get_bits1(&s->gb);
534 s->refidx[2] = get_bits(&s->gb, 3);
535 s->signbias[2] = get_bits1(&s->gb);
536 if (!s->refs[s->refidx[0]].f->data[0] ||
537 !s->refs[s->refidx[1]].f->data[0] ||
538 !s->refs[s->refidx[2]].f->data[0]) {
539 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
540 return AVERROR_INVALIDDATA;
542 if (get_bits1(&s->gb)) {
543 w = s->refs[s->refidx[0]].f->width;
544 h = s->refs[s->refidx[0]].f->height;
545 } else if (get_bits1(&s->gb)) {
546 w = s->refs[s->refidx[1]].f->width;
547 h = s->refs[s->refidx[1]].f->height;
548 } else if (get_bits1(&s->gb)) {
549 w = s->refs[s->refidx[2]].f->width;
550 h = s->refs[s->refidx[2]].f->height;
552 w = get_bits(&s->gb, 16) + 1;
553 h = get_bits(&s->gb, 16) + 1;
555 // Note that in this code, "CUR_FRAME" is actually before we
556 // have formally allocated a frame, and thus actually represents
558 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
559 s->frames[CUR_FRAME].tf.f->height == h;
560 if (get_bits1(&s->gb)) // display size
561 skip_bits(&s->gb, 32);
562 s->highprecisionmvs = get_bits1(&s->gb);
563 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
565 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
566 s->signbias[0] != s->signbias[2];
567 if (s->allowcompinter) {
568 if (s->signbias[0] == s->signbias[1]) {
570 s->varcompref[0] = 0;
571 s->varcompref[1] = 1;
572 } else if (s->signbias[0] == s->signbias[2]) {
574 s->varcompref[0] = 0;
575 s->varcompref[1] = 2;
578 s->varcompref[0] = 1;
579 s->varcompref[1] = 2;
583 for (i = 0; i < 3; i++) {
584 AVFrame *ref = s->refs[s->refidx[i]].f;
585 int refw = ref->width, refh = ref->height;
587 if (refw == w && refh == h) {
588 s->mvscale[i][0] = s->mvscale[i][1] = 0;
590 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
591 av_log(ctx, AV_LOG_ERROR,
592 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
594 return AVERROR_INVALIDDATA;
596 s->mvscale[i][0] = (refw << 14) / w;
597 s->mvscale[i][1] = (refh << 14) / h;
598 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
599 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
604 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
605 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
606 s->framectxid = c = get_bits(&s->gb, 2);
608 /* loopfilter header data */
609 s->filter.level = get_bits(&s->gb, 6);
610 sharp = get_bits(&s->gb, 3);
611 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
612 // the old cache values since they are still valid
613 if (s->filter.sharpness != sharp)
614 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
615 s->filter.sharpness = sharp;
616 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
617 if (get_bits1(&s->gb)) {
618 for (i = 0; i < 4; i++)
619 if (get_bits1(&s->gb))
620 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
621 for (i = 0; i < 2; i++)
622 if (get_bits1(&s->gb))
623 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
627 /* quantization header data */
628 s->yac_qi = get_bits(&s->gb, 8);
629 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
630 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
631 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
632 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
633 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
635 /* segmentation header info */
636 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
637 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
638 for (i = 0; i < 7; i++)
639 s->prob.seg[i] = get_bits1(&s->gb) ?
640 get_bits(&s->gb, 8) : 255;
641 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
642 for (i = 0; i < 3; i++)
643 s->prob.segpred[i] = get_bits1(&s->gb) ?
644 get_bits(&s->gb, 8) : 255;
647 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
648 (w != s->frames[CUR_FRAME].tf.f->width ||
649 h != s->frames[CUR_FRAME].tf.f->height)) {
650 av_log(ctx, AV_LOG_ERROR,
651 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
652 s->segmentation.temporal, s->segmentation.update_map);
653 return AVERROR_INVALIDDATA;
656 if (get_bits1(&s->gb)) {
657 s->segmentation.absolute_vals = get_bits1(&s->gb);
658 for (i = 0; i < 8; i++) {
659 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
660 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
661 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
662 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
663 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
664 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
665 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
669 s->segmentation.feat[0].q_enabled = 0;
670 s->segmentation.feat[0].lf_enabled = 0;
671 s->segmentation.feat[0].skip_enabled = 0;
672 s->segmentation.feat[0].ref_enabled = 0;
675 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
676 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
677 int qyac, qydc, quvac, quvdc, lflvl, sh;
679 if (s->segmentation.feat[i].q_enabled) {
680 if (s->segmentation.absolute_vals)
681 qyac = s->segmentation.feat[i].q_val;
683 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
687 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
688 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
689 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
690 qyac = av_clip_uintp2(qyac, 8);
692 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
693 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
694 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
695 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
697 sh = s->filter.level >= 32;
698 if (s->segmentation.feat[i].lf_enabled) {
699 if (s->segmentation.absolute_vals)
700 lflvl = s->segmentation.feat[i].lf_val;
702 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
704 lflvl = s->filter.level;
706 if (s->lf_delta.enabled) {
707 s->segmentation.feat[i].lflvl[0][0] =
708 s->segmentation.feat[i].lflvl[0][1] =
709 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
710 for (j = 1; j < 4; j++) {
711 s->segmentation.feat[i].lflvl[j][0] =
712 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
713 s->lf_delta.mode[0]) * (1 << sh)), 6);
714 s->segmentation.feat[i].lflvl[j][1] =
715 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
716 s->lf_delta.mode[1]) * (1 << sh)), 6);
719 memset(s->segmentation.feat[i].lflvl, lflvl,
720 sizeof(s->segmentation.feat[i].lflvl));
725 if ((res = update_size(ctx, w, h)) < 0) {
726 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
729 for (s->tiling.log2_tile_cols = 0;
730 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
731 s->tiling.log2_tile_cols++) ;
732 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
733 max = FFMAX(0, max - 1);
734 while (max > s->tiling.log2_tile_cols) {
735 if (get_bits1(&s->gb))
736 s->tiling.log2_tile_cols++;
740 s->tiling.log2_tile_rows = decode012(&s->gb);
741 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
742 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
743 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
744 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
745 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
747 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
748 return AVERROR(ENOMEM);
752 if (s->keyframe || s->errorres || s->intraonly) {
753 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
754 s->prob_ctx[3].p = vp9_default_probs;
755 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
756 sizeof(vp9_default_coef_probs));
757 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
758 sizeof(vp9_default_coef_probs));
759 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
760 sizeof(vp9_default_coef_probs));
761 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
762 sizeof(vp9_default_coef_probs));
765 // next 16 bits is size of the rest of the header (arith-coded)
766 size2 = get_bits(&s->gb, 16);
767 data2 = align_get_bits(&s->gb);
768 if (size2 > size - (data2 - data)) {
769 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
770 return AVERROR_INVALIDDATA;
772 ff_vp56_init_range_decoder(&s->c, data2, size2);
773 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
774 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
775 return AVERROR_INVALIDDATA;
778 if (s->keyframe || s->intraonly) {
779 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
781 memset(&s->counts, 0, sizeof(s->counts));
783 // FIXME is it faster to not copy here, but do it down in the fw updates
784 // as explicit copies if the fw update is missing (and skip the copy upon
786 s->prob.p = s->prob_ctx[c].p;
790 s->txfmmode = TX_4X4;
792 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
793 if (s->txfmmode == 3)
794 s->txfmmode += vp8_rac_get(&s->c);
796 if (s->txfmmode == TX_SWITCHABLE) {
797 for (i = 0; i < 2; i++)
798 if (vp56_rac_get_prob_branchy(&s->c, 252))
799 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
800 for (i = 0; i < 2; i++)
801 for (j = 0; j < 2; j++)
802 if (vp56_rac_get_prob_branchy(&s->c, 252))
803 s->prob.p.tx16p[i][j] =
804 update_prob(&s->c, s->prob.p.tx16p[i][j]);
805 for (i = 0; i < 2; i++)
806 for (j = 0; j < 3; j++)
807 if (vp56_rac_get_prob_branchy(&s->c, 252))
808 s->prob.p.tx32p[i][j] =
809 update_prob(&s->c, s->prob.p.tx32p[i][j]);
814 for (i = 0; i < 4; i++) {
815 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
816 if (vp8_rac_get(&s->c)) {
817 for (j = 0; j < 2; j++)
818 for (k = 0; k < 2; k++)
819 for (l = 0; l < 6; l++)
820 for (m = 0; m < 6; m++) {
821 uint8_t *p = s->prob.coef[i][j][k][l][m];
822 uint8_t *r = ref[j][k][l][m];
823 if (m >= 3 && l == 0) // dc only has 3 pt
825 for (n = 0; n < 3; n++) {
826 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
827 p[n] = update_prob(&s->c, r[n]);
835 for (j = 0; j < 2; j++)
836 for (k = 0; k < 2; k++)
837 for (l = 0; l < 6; l++)
838 for (m = 0; m < 6; m++) {
839 uint8_t *p = s->prob.coef[i][j][k][l][m];
840 uint8_t *r = ref[j][k][l][m];
841 if (m > 3 && l == 0) // dc only has 3 pt
847 if (s->txfmmode == i)
852 for (i = 0; i < 3; i++)
853 if (vp56_rac_get_prob_branchy(&s->c, 252))
854 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
855 if (!s->keyframe && !s->intraonly) {
856 for (i = 0; i < 7; i++)
857 for (j = 0; j < 3; j++)
858 if (vp56_rac_get_prob_branchy(&s->c, 252))
859 s->prob.p.mv_mode[i][j] =
860 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
862 if (s->filtermode == FILTER_SWITCHABLE)
863 for (i = 0; i < 4; i++)
864 for (j = 0; j < 2; j++)
865 if (vp56_rac_get_prob_branchy(&s->c, 252))
866 s->prob.p.filter[i][j] =
867 update_prob(&s->c, s->prob.p.filter[i][j]);
869 for (i = 0; i < 4; i++)
870 if (vp56_rac_get_prob_branchy(&s->c, 252))
871 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
873 if (s->allowcompinter) {
874 s->comppredmode = vp8_rac_get(&s->c);
876 s->comppredmode += vp8_rac_get(&s->c);
877 if (s->comppredmode == PRED_SWITCHABLE)
878 for (i = 0; i < 5; i++)
879 if (vp56_rac_get_prob_branchy(&s->c, 252))
881 update_prob(&s->c, s->prob.p.comp[i]);
883 s->comppredmode = PRED_SINGLEREF;
886 if (s->comppredmode != PRED_COMPREF) {
887 for (i = 0; i < 5; i++) {
888 if (vp56_rac_get_prob_branchy(&s->c, 252))
889 s->prob.p.single_ref[i][0] =
890 update_prob(&s->c, s->prob.p.single_ref[i][0]);
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.single_ref[i][1] =
893 update_prob(&s->c, s->prob.p.single_ref[i][1]);
897 if (s->comppredmode != PRED_SINGLEREF) {
898 for (i = 0; i < 5; i++)
899 if (vp56_rac_get_prob_branchy(&s->c, 252))
900 s->prob.p.comp_ref[i] =
901 update_prob(&s->c, s->prob.p.comp_ref[i]);
904 for (i = 0; i < 4; i++)
905 for (j = 0; j < 9; j++)
906 if (vp56_rac_get_prob_branchy(&s->c, 252))
907 s->prob.p.y_mode[i][j] =
908 update_prob(&s->c, s->prob.p.y_mode[i][j]);
910 for (i = 0; i < 4; i++)
911 for (j = 0; j < 4; j++)
912 for (k = 0; k < 3; k++)
913 if (vp56_rac_get_prob_branchy(&s->c, 252))
914 s->prob.p.partition[3 - i][j][k] =
915 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
917 // mv fields don't use the update_prob subexp model for some reason
918 for (i = 0; i < 3; i++)
919 if (vp56_rac_get_prob_branchy(&s->c, 252))
920 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
922 for (i = 0; i < 2; i++) {
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
926 for (j = 0; j < 10; j++)
927 if (vp56_rac_get_prob_branchy(&s->c, 252))
928 s->prob.p.mv_comp[i].classes[j] =
929 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
931 if (vp56_rac_get_prob_branchy(&s->c, 252))
932 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
934 for (j = 0; j < 10; j++)
935 if (vp56_rac_get_prob_branchy(&s->c, 252))
936 s->prob.p.mv_comp[i].bits[j] =
937 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
940 for (i = 0; i < 2; i++) {
941 for (j = 0; j < 2; j++)
942 for (k = 0; k < 3; k++)
943 if (vp56_rac_get_prob_branchy(&s->c, 252))
944 s->prob.p.mv_comp[i].class0_fp[j][k] =
945 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
947 for (j = 0; j < 3; j++)
948 if (vp56_rac_get_prob_branchy(&s->c, 252))
949 s->prob.p.mv_comp[i].fp[j] =
950 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
953 if (s->highprecisionmvs) {
954 for (i = 0; i < 2; i++) {
955 if (vp56_rac_get_prob_branchy(&s->c, 252))
956 s->prob.p.mv_comp[i].class0_hp =
957 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
960 s->prob.p.mv_comp[i].hp =
961 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
966 return (data2 - data) + size2;
969 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
972 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
973 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
976 static void find_ref_mvs(VP9Context *s,
977 VP56mv *pmv, int ref, int z, int idx, int sb)
979 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
980 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
981 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
982 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
983 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
984 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
985 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
986 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
987 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
988 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
989 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
990 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
991 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
992 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
993 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
994 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
995 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
996 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
997 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
998 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
999 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1000 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1001 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1002 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1003 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1004 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
1005 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
1008 int row = s->row, col = s->col, row7 = s->row7;
1009 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
1010 #define INVALID_MV 0x80008000U
1011 uint32_t mem = INVALID_MV;
1014 #define RETURN_DIRECT_MV(mv) \
1016 uint32_t m = AV_RN32A(&mv); \
1020 } else if (mem == INVALID_MV) { \
1022 } else if (m != mem) { \
1029 if (sb == 2 || sb == 1) {
1030 RETURN_DIRECT_MV(b->mv[0][z]);
1031 } else if (sb == 3) {
1032 RETURN_DIRECT_MV(b->mv[2][z]);
1033 RETURN_DIRECT_MV(b->mv[1][z]);
1034 RETURN_DIRECT_MV(b->mv[0][z]);
1037 #define RETURN_MV(mv) \
1042 clamp_mv(&tmp, &mv, s); \
1043 m = AV_RN32A(&tmp); \
1047 } else if (mem == INVALID_MV) { \
1049 } else if (m != mem) { \
1054 uint32_t m = AV_RN32A(&mv); \
1056 clamp_mv(pmv, &mv, s); \
1058 } else if (mem == INVALID_MV) { \
1060 } else if (m != mem) { \
1061 clamp_mv(pmv, &mv, s); \
1068 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1069 if (mv->ref[0] == ref) {
1070 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1071 } else if (mv->ref[1] == ref) {
1072 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1075 if (col > s->tiling.tile_col_start) {
1076 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1077 if (mv->ref[0] == ref) {
1078 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1079 } else if (mv->ref[1] == ref) {
1080 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1088 // previously coded MVs in this neighbourhood, using same reference frame
1089 for (; i < 8; i++) {
1090 int c = p[i][0] + col, r = p[i][1] + row;
1092 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1093 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1095 if (mv->ref[0] == ref) {
1096 RETURN_MV(mv->mv[0]);
1097 } else if (mv->ref[1] == ref) {
1098 RETURN_MV(mv->mv[1]);
1103 // MV at this position in previous frame, using same reference frame
1104 if (s->use_last_frame_mvs) {
1105 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1107 if (!s->frames[REF_FRAME_MVPAIR].uses_2pass)
1108 ff_thread_await_progress(&s->frames[REF_FRAME_MVPAIR].tf, row >> 3, 0);
1109 if (mv->ref[0] == ref) {
1110 RETURN_MV(mv->mv[0]);
1111 } else if (mv->ref[1] == ref) {
1112 RETURN_MV(mv->mv[1]);
1116 #define RETURN_SCALE_MV(mv, scale) \
1119 VP56mv mv_temp = { -mv.x, -mv.y }; \
1120 RETURN_MV(mv_temp); \
1126 // previously coded MVs in this neighbourhood, using different reference frame
1127 for (i = 0; i < 8; i++) {
1128 int c = p[i][0] + col, r = p[i][1] + row;
1130 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1131 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1133 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1134 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1136 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1137 // BUG - libvpx has this condition regardless of whether
1138 // we used the first ref MV and pre-scaling
1139 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1140 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1145 // MV at this position in previous frame, using different reference frame
1146 if (s->use_last_frame_mvs) {
1147 struct VP9mvrefPair *mv = &s->frames[REF_FRAME_MVPAIR].mv[row * s->sb_cols * 8 + col];
1149 // no need to await_progress, because we already did that above
1150 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1151 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1153 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1154 // BUG - libvpx has this condition regardless of whether
1155 // we used the first ref MV and pre-scaling
1156 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1157 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1164 #undef RETURN_SCALE_MV
1167 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1169 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1170 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1171 s->prob.p.mv_comp[idx].classes);
1173 s->counts.mv_comp[idx].sign[sign]++;
1174 s->counts.mv_comp[idx].classes[c]++;
1178 for (n = 0, m = 0; m < c; m++) {
1179 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1181 s->counts.mv_comp[idx].bits[m][bit]++;
1184 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1186 s->counts.mv_comp[idx].fp[bit]++;
1188 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1189 s->counts.mv_comp[idx].hp[bit]++;
1193 // bug in libvpx - we count for bw entropy purposes even if the
1195 s->counts.mv_comp[idx].hp[1]++;
1199 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1200 s->counts.mv_comp[idx].class0[n]++;
1201 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1202 s->prob.p.mv_comp[idx].class0_fp[n]);
1203 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1204 n = (n << 3) | (bit << 1);
1206 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1207 s->counts.mv_comp[idx].class0_hp[bit]++;
1211 // bug in libvpx - we count for bw entropy purposes even if the
1213 s->counts.mv_comp[idx].class0_hp[1]++;
1217 return sign ? -(n + 1) : (n + 1);
1220 static void fill_mv(VP9Context *s,
1221 VP56mv *mv, int mode, int sb)
1225 if (mode == ZEROMV) {
1230 // FIXME cache this value and reuse for other subblocks
1231 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1232 mode == NEWMV ? -1 : sb);
1233 // FIXME maybe move this code into find_ref_mvs()
1234 if ((mode == NEWMV || sb == -1) &&
1235 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1249 if (mode == NEWMV) {
1250 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1251 s->prob.p.mv_joint);
1253 s->counts.mv_joint[j]++;
1254 if (j >= MV_JOINT_V)
1255 mv[0].y += read_mv_component(s, 0, hp);
1257 mv[0].x += read_mv_component(s, 1, hp);
1261 // FIXME cache this value and reuse for other subblocks
1262 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1263 mode == NEWMV ? -1 : sb);
1264 if ((mode == NEWMV || sb == -1) &&
1265 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1279 if (mode == NEWMV) {
1280 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1281 s->prob.p.mv_joint);
1283 s->counts.mv_joint[j]++;
1284 if (j >= MV_JOINT_V)
1285 mv[1].y += read_mv_component(s, 0, hp);
1287 mv[1].x += read_mv_component(s, 1, hp);
1293 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1294 ptrdiff_t stride, int v)
1304 int v16 = v * 0x0101;
1312 uint32_t v32 = v * 0x01010101;
1321 uint64_t v64 = v * 0x0101010101010101ULL;
1327 uint32_t v32 = v * 0x01010101;
1330 AV_WN32A(ptr + 4, v32);
1339 static void decode_mode(AVCodecContext *ctx)
1341 static const uint8_t left_ctx[N_BS_SIZES] = {
1342 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1344 static const uint8_t above_ctx[N_BS_SIZES] = {
1345 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1347 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1348 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1349 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1351 VP9Context *s = ctx->priv_data;
1353 int row = s->row, col = s->col, row7 = s->row7;
1354 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1355 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1356 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1357 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1358 int vref, filter_id;
1360 if (!s->segmentation.enabled) {
1362 } else if (s->keyframe || s->intraonly) {
1363 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1364 } else if (!s->segmentation.update_map ||
1365 (s->segmentation.temporal &&
1366 vp56_rac_get_prob_branchy(&s->c,
1367 s->prob.segpred[s->above_segpred_ctx[col] +
1368 s->left_segpred_ctx[row7]]))) {
1371 uint8_t *refsegmap = s->frames[REF_FRAME_SEGMAP].segmentation_map;
1373 if (!s->frames[REF_FRAME_SEGMAP].uses_2pass)
1374 ff_thread_await_progress(&s->frames[REF_FRAME_SEGMAP].tf, row >> 3, 0);
1375 for (y = 0; y < h4; y++) {
1376 int idx_base = (y + row) * 8 * s->sb_cols + col;
1377 for (x = 0; x < w4; x++)
1378 pred = FFMIN(pred, refsegmap[idx_base + x]);
1380 av_assert1(pred < 8);
1386 memset(&s->above_segpred_ctx[col], 1, w4);
1387 memset(&s->left_segpred_ctx[row7], 1, h4);
1389 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1392 memset(&s->above_segpred_ctx[col], 0, w4);
1393 memset(&s->left_segpred_ctx[row7], 0, h4);
1395 if (s->segmentation.enabled &&
1396 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1397 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1398 w4, h4, 8 * s->sb_cols, b->seg_id);
1401 b->skip = s->segmentation.enabled &&
1402 s->segmentation.feat[b->seg_id].skip_enabled;
1404 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1405 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1406 s->counts.skip[c][b->skip]++;
1409 if (s->keyframe || s->intraonly) {
1411 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1412 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1416 if (have_a && have_l) {
1417 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1420 c = have_a ? 2 * s->above_intra_ctx[col] :
1421 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1423 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1424 s->counts.intra[c][bit]++;
1428 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1432 c = (s->above_skip_ctx[col] ? max_tx :
1433 s->above_txfm_ctx[col]) +
1434 (s->left_skip_ctx[row7] ? max_tx :
1435 s->left_txfm_ctx[row7]) > max_tx;
1437 c = s->above_skip_ctx[col] ? 1 :
1438 (s->above_txfm_ctx[col] * 2 > max_tx);
1440 } else if (have_l) {
1441 c = s->left_skip_ctx[row7] ? 1 :
1442 (s->left_txfm_ctx[row7] * 2 > max_tx);
1448 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1450 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1452 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1454 s->counts.tx32p[c][b->tx]++;
1457 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1459 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1460 s->counts.tx16p[c][b->tx]++;
1463 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1464 s->counts.tx8p[c][b->tx]++;
1471 b->tx = FFMIN(max_tx, s->txfmmode);
1474 if (s->keyframe || s->intraonly) {
1475 uint8_t *a = &s->above_mode_ctx[col * 2];
1476 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1479 if (b->bs > BS_8x8) {
1480 // FIXME the memory storage intermediates here aren't really
1481 // necessary, they're just there to make the code slightly
1483 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1484 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1485 if (b->bs != BS_8x4) {
1486 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1487 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1488 l[0] = a[1] = b->mode[1];
1490 l[0] = a[1] = b->mode[1] = b->mode[0];
1492 if (b->bs != BS_4x8) {
1493 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1494 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1495 if (b->bs != BS_8x4) {
1496 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1497 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1498 l[1] = a[1] = b->mode[3];
1500 l[1] = a[1] = b->mode[3] = b->mode[2];
1503 b->mode[2] = b->mode[0];
1504 l[1] = a[1] = b->mode[3] = b->mode[1];
1507 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1508 vp9_default_kf_ymode_probs[*a][*l]);
1509 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1510 // FIXME this can probably be optimized
1511 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1512 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1514 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1515 vp9_default_kf_uvmode_probs[b->mode[3]]);
1516 } else if (b->intra) {
1518 if (b->bs > BS_8x8) {
1519 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1520 s->prob.p.y_mode[0]);
1521 s->counts.y_mode[0][b->mode[0]]++;
1522 if (b->bs != BS_8x4) {
1523 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1524 s->prob.p.y_mode[0]);
1525 s->counts.y_mode[0][b->mode[1]]++;
1527 b->mode[1] = b->mode[0];
1529 if (b->bs != BS_4x8) {
1530 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1531 s->prob.p.y_mode[0]);
1532 s->counts.y_mode[0][b->mode[2]]++;
1533 if (b->bs != BS_8x4) {
1534 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1535 s->prob.p.y_mode[0]);
1536 s->counts.y_mode[0][b->mode[3]]++;
1538 b->mode[3] = b->mode[2];
1541 b->mode[2] = b->mode[0];
1542 b->mode[3] = b->mode[1];
1545 static const uint8_t size_group[10] = {
1546 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1548 int sz = size_group[b->bs];
1550 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1551 s->prob.p.y_mode[sz]);
1552 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1553 s->counts.y_mode[sz][b->mode[3]]++;
1555 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1556 s->prob.p.uv_mode[b->mode[3]]);
1557 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1559 static const uint8_t inter_mode_ctx_lut[14][14] = {
1560 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1561 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1562 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1563 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1564 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1565 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1566 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1567 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1568 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1569 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1570 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1571 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1572 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1573 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1576 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1577 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1579 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1581 // read comp_pred flag
1582 if (s->comppredmode != PRED_SWITCHABLE) {
1583 b->comp = s->comppredmode == PRED_COMPREF;
1587 // FIXME add intra as ref=0xff (or -1) to make these easier?
1590 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1592 } else if (s->above_comp_ctx[col]) {
1593 c = 2 + (s->left_intra_ctx[row7] ||
1594 s->left_ref_ctx[row7] == s->fixcompref);
1595 } else if (s->left_comp_ctx[row7]) {
1596 c = 2 + (s->above_intra_ctx[col] ||
1597 s->above_ref_ctx[col] == s->fixcompref);
1599 c = (!s->above_intra_ctx[col] &&
1600 s->above_ref_ctx[col] == s->fixcompref) ^
1601 (!s->left_intra_ctx[row7] &&
1602 s->left_ref_ctx[row & 7] == s->fixcompref);
1605 c = s->above_comp_ctx[col] ? 3 :
1606 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1608 } else if (have_l) {
1609 c = s->left_comp_ctx[row7] ? 3 :
1610 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1614 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1615 s->counts.comp[c][b->comp]++;
1618 // read actual references
1619 // FIXME probably cache a few variables here to prevent repetitive
1620 // memory accesses below
1621 if (b->comp) /* two references */ {
1622 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1624 b->ref[fix_idx] = s->fixcompref;
1625 // FIXME can this codeblob be replaced by some sort of LUT?
1628 if (s->above_intra_ctx[col]) {
1629 if (s->left_intra_ctx[row7]) {
1632 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1634 } else if (s->left_intra_ctx[row7]) {
1635 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1637 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1639 if (refl == refa && refa == s->varcompref[1]) {
1641 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1642 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1643 (refl == s->fixcompref && refa == s->varcompref[0])) {
1646 c = (refa == refl) ? 3 : 1;
1648 } else if (!s->left_comp_ctx[row7]) {
1649 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1652 c = (refl == s->varcompref[1] &&
1653 refa != s->varcompref[1]) ? 2 : 4;
1655 } else if (!s->above_comp_ctx[col]) {
1656 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1659 c = (refa == s->varcompref[1] &&
1660 refl != s->varcompref[1]) ? 2 : 4;
1663 c = (refl == refa) ? 4 : 2;
1667 if (s->above_intra_ctx[col]) {
1669 } else if (s->above_comp_ctx[col]) {
1670 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1672 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1675 } else if (have_l) {
1676 if (s->left_intra_ctx[row7]) {
1678 } else if (s->left_comp_ctx[row7]) {
1679 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1681 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1686 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1687 b->ref[var_idx] = s->varcompref[bit];
1688 s->counts.comp_ref[c][bit]++;
1689 } else /* single reference */ {
1692 if (have_a && !s->above_intra_ctx[col]) {
1693 if (have_l && !s->left_intra_ctx[row7]) {
1694 if (s->left_comp_ctx[row7]) {
1695 if (s->above_comp_ctx[col]) {
1696 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1697 !s->above_ref_ctx[col]);
1699 c = (3 * !s->above_ref_ctx[col]) +
1700 (!s->fixcompref || !s->left_ref_ctx[row7]);
1702 } else if (s->above_comp_ctx[col]) {
1703 c = (3 * !s->left_ref_ctx[row7]) +
1704 (!s->fixcompref || !s->above_ref_ctx[col]);
1706 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1708 } else if (s->above_intra_ctx[col]) {
1710 } else if (s->above_comp_ctx[col]) {
1711 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1713 c = 4 * (!s->above_ref_ctx[col]);
1715 } else if (have_l && !s->left_intra_ctx[row7]) {
1716 if (s->left_intra_ctx[row7]) {
1718 } else if (s->left_comp_ctx[row7]) {
1719 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1721 c = 4 * (!s->left_ref_ctx[row7]);
1726 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1727 s->counts.single_ref[c][0][bit]++;
1731 // FIXME can this codeblob be replaced by some sort of LUT?
1734 if (s->left_intra_ctx[row7]) {
1735 if (s->above_intra_ctx[col]) {
1737 } else if (s->above_comp_ctx[col]) {
1738 c = 1 + 2 * (s->fixcompref == 1 ||
1739 s->above_ref_ctx[col] == 1);
1740 } else if (!s->above_ref_ctx[col]) {
1743 c = 4 * (s->above_ref_ctx[col] == 1);
1745 } else if (s->above_intra_ctx[col]) {
1746 if (s->left_intra_ctx[row7]) {
1748 } else if (s->left_comp_ctx[row7]) {
1749 c = 1 + 2 * (s->fixcompref == 1 ||
1750 s->left_ref_ctx[row7] == 1);
1751 } else if (!s->left_ref_ctx[row7]) {
1754 c = 4 * (s->left_ref_ctx[row7] == 1);
1756 } else if (s->above_comp_ctx[col]) {
1757 if (s->left_comp_ctx[row7]) {
1758 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1759 c = 3 * (s->fixcompref == 1 ||
1760 s->left_ref_ctx[row7] == 1);
1764 } else if (!s->left_ref_ctx[row7]) {
1765 c = 1 + 2 * (s->fixcompref == 1 ||
1766 s->above_ref_ctx[col] == 1);
1768 c = 3 * (s->left_ref_ctx[row7] == 1) +
1769 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1771 } else if (s->left_comp_ctx[row7]) {
1772 if (!s->above_ref_ctx[col]) {
1773 c = 1 + 2 * (s->fixcompref == 1 ||
1774 s->left_ref_ctx[row7] == 1);
1776 c = 3 * (s->above_ref_ctx[col] == 1) +
1777 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1779 } else if (!s->above_ref_ctx[col]) {
1780 if (!s->left_ref_ctx[row7]) {
1783 c = 4 * (s->left_ref_ctx[row7] == 1);
1785 } else if (!s->left_ref_ctx[row7]) {
1786 c = 4 * (s->above_ref_ctx[col] == 1);
1788 c = 2 * (s->left_ref_ctx[row7] == 1) +
1789 2 * (s->above_ref_ctx[col] == 1);
1792 if (s->above_intra_ctx[col] ||
1793 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1795 } else if (s->above_comp_ctx[col]) {
1796 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1798 c = 4 * (s->above_ref_ctx[col] == 1);
1801 } else if (have_l) {
1802 if (s->left_intra_ctx[row7] ||
1803 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1805 } else if (s->left_comp_ctx[row7]) {
1806 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1808 c = 4 * (s->left_ref_ctx[row7] == 1);
1813 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1814 s->counts.single_ref[c][1][bit]++;
1815 b->ref[0] = 1 + bit;
1820 if (b->bs <= BS_8x8) {
1821 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1822 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1824 static const uint8_t off[10] = {
1825 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1828 // FIXME this needs to use the LUT tables from find_ref_mvs
1829 // because not all are -1,0/0,-1
1830 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1831 [s->left_mode_ctx[row7 + off[b->bs]]];
1833 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1834 s->prob.p.mv_mode[c]);
1835 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1836 s->counts.mv_mode[c][b->mode[0] - 10]++;
1840 if (s->filtermode == FILTER_SWITCHABLE) {
1843 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1844 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1845 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1846 s->left_filter_ctx[row7] : 3;
1848 c = s->above_filter_ctx[col];
1850 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1851 c = s->left_filter_ctx[row7];
1856 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1857 s->prob.p.filter[c]);
1858 s->counts.filter[c][filter_id]++;
1859 b->filter = vp9_filter_lut[filter_id];
1861 b->filter = s->filtermode;
1864 if (b->bs > BS_8x8) {
1865 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1867 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1868 s->prob.p.mv_mode[c]);
1869 s->counts.mv_mode[c][b->mode[0] - 10]++;
1870 fill_mv(s, b->mv[0], b->mode[0], 0);
1872 if (b->bs != BS_8x4) {
1873 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1874 s->prob.p.mv_mode[c]);
1875 s->counts.mv_mode[c][b->mode[1] - 10]++;
1876 fill_mv(s, b->mv[1], b->mode[1], 1);
1878 b->mode[1] = b->mode[0];
1879 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1880 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1883 if (b->bs != BS_4x8) {
1884 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1885 s->prob.p.mv_mode[c]);
1886 s->counts.mv_mode[c][b->mode[2] - 10]++;
1887 fill_mv(s, b->mv[2], b->mode[2], 2);
1889 if (b->bs != BS_8x4) {
1890 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1891 s->prob.p.mv_mode[c]);
1892 s->counts.mv_mode[c][b->mode[3] - 10]++;
1893 fill_mv(s, b->mv[3], b->mode[3], 3);
1895 b->mode[3] = b->mode[2];
1896 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1897 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1900 b->mode[2] = b->mode[0];
1901 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1902 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1903 b->mode[3] = b->mode[1];
1904 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1905 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1908 fill_mv(s, b->mv[0], b->mode[0], -1);
1909 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1910 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1911 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1912 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1913 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1914 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1917 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1921 #define SPLAT_CTX(var, val, n) \
1923 case 1: var = val; break; \
1924 case 2: AV_WN16A(&var, val * 0x0101); break; \
1925 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1926 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1928 uint64_t v64 = val * 0x0101010101010101ULL; \
1929 AV_WN64A( &var, v64); \
1930 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1935 #define SPLAT_CTX(var, val, n) \
1937 case 1: var = val; break; \
1938 case 2: AV_WN16A(&var, val * 0x0101); break; \
1939 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1941 uint32_t v32 = val * 0x01010101; \
1942 AV_WN32A( &var, v32); \
1943 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1947 uint32_t v32 = val * 0x01010101; \
1948 AV_WN32A( &var, v32); \
1949 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1950 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1951 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1957 switch (bwh_tab[1][b->bs][0]) {
1958 #define SET_CTXS(dir, off, n) \
1960 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1961 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1962 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1963 if (!s->keyframe && !s->intraonly) { \
1964 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1965 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1966 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1968 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1969 if (s->filtermode == FILTER_SWITCHABLE) { \
1970 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1975 case 1: SET_CTXS(above, col, 1); break;
1976 case 2: SET_CTXS(above, col, 2); break;
1977 case 4: SET_CTXS(above, col, 4); break;
1978 case 8: SET_CTXS(above, col, 8); break;
1980 switch (bwh_tab[1][b->bs][1]) {
1981 case 1: SET_CTXS(left, row7, 1); break;
1982 case 2: SET_CTXS(left, row7, 2); break;
1983 case 4: SET_CTXS(left, row7, 4); break;
1984 case 8: SET_CTXS(left, row7, 8); break;
1989 if (!s->keyframe && !s->intraonly) {
1990 if (b->bs > BS_8x8) {
1991 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1993 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1994 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1995 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1996 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1997 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1998 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1999 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
2000 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
2002 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
2004 for (n = 0; n < w4 * 2; n++) {
2005 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
2006 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
2008 for (n = 0; n < h4 * 2; n++) {
2009 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
2010 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
2016 for (y = 0; y < h4; y++) {
2017 int x, o = (row + y) * s->sb_cols * 8 + col;
2018 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
2021 for (x = 0; x < w4; x++) {
2025 } else if (b->comp) {
2026 for (x = 0; x < w4; x++) {
2027 mv[x].ref[0] = b->ref[0];
2028 mv[x].ref[1] = b->ref[1];
2029 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2030 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2033 for (x = 0; x < w4; x++) {
2034 mv[x].ref[0] = b->ref[0];
2036 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2042 // FIXME merge cnt/eob arguments?
2043 static av_always_inline int
2044 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2045 int is_tx32x32, unsigned (*cnt)[6][3],
2046 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2047 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2048 const int16_t *band_counts, const int16_t *qmul)
2050 int i = 0, band = 0, band_left = band_counts[band];
2051 uint8_t *tp = p[0][nnz];
2052 uint8_t cache[1024];
2057 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2058 eob[band][nnz][val]++;
2063 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2064 cnt[band][nnz][0]++;
2066 band_left = band_counts[++band];
2068 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2070 if (++i == n_coeffs)
2071 break; //invalid input; blocks should end with EOB
2076 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2077 cnt[band][nnz][1]++;
2081 // fill in p[3-10] (model fill) - only once per frame for each pos
2083 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2085 cnt[band][nnz][2]++;
2086 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2087 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2088 cache[rc] = val = 2;
2090 val = 3 + vp56_rac_get_prob(c, tp[5]);
2093 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2095 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2096 val = 5 + vp56_rac_get_prob(c, 159);
2098 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2099 val += vp56_rac_get_prob(c, 145);
2103 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2104 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2105 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2106 val += (vp56_rac_get_prob(c, 148) << 1);
2107 val += vp56_rac_get_prob(c, 140);
2109 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2110 val += (vp56_rac_get_prob(c, 155) << 2);
2111 val += (vp56_rac_get_prob(c, 140) << 1);
2112 val += vp56_rac_get_prob(c, 135);
2114 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2115 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2116 val += (vp56_rac_get_prob(c, 157) << 3);
2117 val += (vp56_rac_get_prob(c, 141) << 2);
2118 val += (vp56_rac_get_prob(c, 134) << 1);
2119 val += vp56_rac_get_prob(c, 130);
2121 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2122 val += (vp56_rac_get_prob(c, 254) << 12);
2123 val += (vp56_rac_get_prob(c, 254) << 11);
2124 val += (vp56_rac_get_prob(c, 252) << 10);
2125 val += (vp56_rac_get_prob(c, 249) << 9);
2126 val += (vp56_rac_get_prob(c, 243) << 8);
2127 val += (vp56_rac_get_prob(c, 230) << 7);
2128 val += (vp56_rac_get_prob(c, 196) << 6);
2129 val += (vp56_rac_get_prob(c, 177) << 5);
2130 val += (vp56_rac_get_prob(c, 153) << 4);
2131 val += (vp56_rac_get_prob(c, 140) << 3);
2132 val += (vp56_rac_get_prob(c, 133) << 2);
2133 val += (vp56_rac_get_prob(c, 130) << 1);
2134 val += vp56_rac_get_prob(c, 129);
2139 band_left = band_counts[++band];
2141 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2143 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2144 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2146 } while (++i < n_coeffs);
2151 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2152 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2153 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2154 const int16_t (*nb)[2], const int16_t *band_counts,
2155 const int16_t *qmul)
2157 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2158 nnz, scan, nb, band_counts, qmul);
2161 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2162 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2163 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2164 const int16_t (*nb)[2], const int16_t *band_counts,
2165 const int16_t *qmul)
2167 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2168 nnz, scan, nb, band_counts, qmul);
2171 static void decode_coeffs(AVCodecContext *ctx)
2173 VP9Context *s = ctx->priv_data;
2175 int row = s->row, col = s->col;
2176 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2177 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2178 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2179 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2180 int end_x = FFMIN(2 * (s->cols - col), w4);
2181 int end_y = FFMIN(2 * (s->rows - row), h4);
2182 int n, pl, x, y, res;
2183 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2184 int tx = 4 * s->lossless + b->tx;
2185 const int16_t * const *yscans = vp9_scans[tx];
2186 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2187 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2188 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2189 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2190 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2191 static const int16_t band_counts[4][8] = {
2192 { 1, 2, 3, 4, 3, 16 - 13 },
2193 { 1, 2, 3, 4, 11, 64 - 21 },
2194 { 1, 2, 3, 4, 11, 256 - 21 },
2195 { 1, 2, 3, 4, 11, 1024 - 21 },
2197 const int16_t *y_band_counts = band_counts[b->tx];
2198 const int16_t *uv_band_counts = band_counts[b->uvtx];
2200 #define MERGE(la, end, step, rd) \
2201 for (n = 0; n < end; n += step) \
2202 la[n] = !!rd(&la[n])
2203 #define MERGE_CTX(step, rd) \
2205 MERGE(l, end_y, step, rd); \
2206 MERGE(a, end_x, step, rd); \
2209 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2210 for (n = 0, y = 0; y < end_y; y += step) { \
2211 for (x = 0; x < end_x; x += step, n += step * step) { \
2212 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2213 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2214 c, e, p, a[x] + l[y], yscans[txtp], \
2215 ynbs[txtp], y_band_counts, qmul[0]); \
2216 a[x] = l[y] = !!res; \
2218 AV_WN16A(&s->eob[n], res); \
2225 #define SPLAT(la, end, step, cond) \
2227 for (n = 1; n < end; n += step) \
2228 la[n] = la[n - 1]; \
2229 } else if (step == 4) { \
2231 for (n = 0; n < end; n += step) \
2232 AV_WN32A(&la[n], la[n] * 0x01010101); \
2234 for (n = 0; n < end; n += step) \
2235 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2237 } else /* step == 8 */ { \
2239 if (HAVE_FAST_64BIT) { \
2240 for (n = 0; n < end; n += step) \
2241 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2243 for (n = 0; n < end; n += step) { \
2244 uint32_t v32 = la[n] * 0x01010101; \
2245 AV_WN32A(&la[n], v32); \
2246 AV_WN32A(&la[n + 4], v32); \
2250 for (n = 0; n < end; n += step) \
2251 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2254 #define SPLAT_CTX(step) \
2256 SPLAT(a, end_x, step, end_x == w4); \
2257 SPLAT(l, end_y, step, end_y == h4); \
2263 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2266 MERGE_CTX(2, AV_RN16A);
2267 DECODE_Y_COEF_LOOP(2, 0,);
2271 MERGE_CTX(4, AV_RN32A);
2272 DECODE_Y_COEF_LOOP(4, 0,);
2276 MERGE_CTX(8, AV_RN64A);
2277 DECODE_Y_COEF_LOOP(8, 0, 32);
2282 #define DECODE_UV_COEF_LOOP(step) \
2283 for (n = 0, y = 0; y < end_y; y += step) { \
2284 for (x = 0; x < end_x; x += step, n += step * step) { \
2285 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2286 16 * step * step, c, e, p, a[x] + l[y], \
2287 uvscan, uvnb, uv_band_counts, qmul[1]); \
2288 a[x] = l[y] = !!res; \
2290 AV_WN16A(&s->uveob[pl][n], res); \
2292 s->uveob[pl][n] = res; \
2297 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2298 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2299 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2304 for (pl = 0; pl < 2; pl++) {
2305 a = &s->above_uv_nnz_ctx[pl][col];
2306 l = &s->left_uv_nnz_ctx[pl][row & 7];
2309 DECODE_UV_COEF_LOOP(1);
2312 MERGE_CTX(2, AV_RN16A);
2313 DECODE_UV_COEF_LOOP(2);
2317 MERGE_CTX(4, AV_RN32A);
2318 DECODE_UV_COEF_LOOP(4);
2322 MERGE_CTX(8, AV_RN64A);
2323 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2324 // so there is no need to loop
2325 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2326 1024, c, e, p, a[0] + l[0],
2327 uvscan, uvnb, uv_band_counts, qmul[1]);
2328 a[0] = l[0] = !!res;
2329 AV_WN16A(&s->uveob[pl][0], res);
2336 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2337 uint8_t *dst_edge, ptrdiff_t stride_edge,
2338 uint8_t *dst_inner, ptrdiff_t stride_inner,
2339 uint8_t *l, int col, int x, int w,
2340 int row, int y, enum TxfmMode tx,
2343 int have_top = row > 0 || y > 0;
2344 int have_left = col > s->tiling.tile_col_start || x > 0;
2345 int have_right = x < w - 1;
2346 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2347 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2348 { DC_127_PRED, VERT_PRED } },
2349 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2350 { HOR_PRED, HOR_PRED } },
2351 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2352 { LEFT_DC_PRED, DC_PRED } },
2353 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2354 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2355 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2356 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2357 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2358 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2359 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2360 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2361 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2362 { DC_127_PRED, VERT_LEFT_PRED } },
2363 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2364 { HOR_UP_PRED, HOR_UP_PRED } },
2365 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2366 { HOR_PRED, TM_VP8_PRED } },
2368 static const struct {
2369 uint8_t needs_left:1;
2370 uint8_t needs_top:1;
2371 uint8_t needs_topleft:1;
2372 uint8_t needs_topright:1;
2373 uint8_t invert_left:1;
2374 } edges[N_INTRA_PRED_MODES] = {
2375 [VERT_PRED] = { .needs_top = 1 },
2376 [HOR_PRED] = { .needs_left = 1 },
2377 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2378 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2379 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2380 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2381 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2382 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2383 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2384 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2385 [LEFT_DC_PRED] = { .needs_left = 1 },
2386 [TOP_DC_PRED] = { .needs_top = 1 },
2387 [DC_128_PRED] = { 0 },
2388 [DC_127_PRED] = { 0 },
2389 [DC_129_PRED] = { 0 }
2392 av_assert2(mode >= 0 && mode < 10);
2393 mode = mode_conv[mode][have_left][have_top];
2394 if (edges[mode].needs_top) {
2395 uint8_t *top, *topleft;
2396 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2397 int n_px_need_tr = 0;
2399 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2402 // if top of sb64-row, use s->intra_pred_data[] instead of
2403 // dst[-stride] for intra prediction (it contains pre- instead of
2404 // post-loopfilter data)
2406 top = !(row & 7) && !y ?
2407 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2408 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2410 topleft = !(row & 7) && !y ?
2411 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2412 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2413 &dst_inner[-stride_inner];
2417 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2418 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2419 n_px_need + n_px_need_tr <= n_px_have) {
2423 if (n_px_need <= n_px_have) {
2424 memcpy(*a, top, n_px_need);
2426 memcpy(*a, top, n_px_have);
2427 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2428 n_px_need - n_px_have);
2431 memset(*a, 127, n_px_need);
2433 if (edges[mode].needs_topleft) {
2434 if (have_left && have_top) {
2435 (*a)[-1] = topleft[-1];
2437 (*a)[-1] = have_top ? 129 : 127;
2440 if (tx == TX_4X4 && edges[mode].needs_topright) {
2441 if (have_top && have_right &&
2442 n_px_need + n_px_need_tr <= n_px_have) {
2443 memcpy(&(*a)[4], &top[4], 4);
2445 memset(&(*a)[4], (*a)[3], 4);
2450 if (edges[mode].needs_left) {
2452 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2453 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2454 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2456 if (edges[mode].invert_left) {
2457 if (n_px_need <= n_px_have) {
2458 for (i = 0; i < n_px_need; i++)
2459 l[i] = dst[i * stride - 1];
2461 for (i = 0; i < n_px_have; i++)
2462 l[i] = dst[i * stride - 1];
2463 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2466 if (n_px_need <= n_px_have) {
2467 for (i = 0; i < n_px_need; i++)
2468 l[n_px_need - 1 - i] = dst[i * stride - 1];
2470 for (i = 0; i < n_px_have; i++)
2471 l[n_px_need - 1 - i] = dst[i * stride - 1];
2472 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2476 memset(l, 129, 4 << tx);
2483 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2485 VP9Context *s = ctx->priv_data;
2487 int row = s->row, col = s->col;
2488 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2489 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2490 int end_x = FFMIN(2 * (s->cols - col), w4);
2491 int end_y = FFMIN(2 * (s->rows - row), h4);
2492 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2493 int uvstep1d = 1 << b->uvtx, p;
2494 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2495 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2496 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2498 for (n = 0, y = 0; y < end_y; y += step1d) {
2499 uint8_t *ptr = dst, *ptr_r = dst_r;
2500 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2501 ptr_r += 4 * step1d, n += step) {
2502 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2504 uint8_t *a = &a_buf[32];
2505 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2506 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2508 mode = check_intra_mode(s, mode, &a, ptr_r,
2509 s->frames[CUR_FRAME].tf.f->linesize[0],
2510 ptr, s->y_stride, l,
2511 col, x, w4, row, y, b->tx, 0);
2512 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2514 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2515 s->block + 16 * n, eob);
2517 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2518 dst += 4 * step1d * s->y_stride;
2525 step = 1 << (b->uvtx * 2);
2526 for (p = 0; p < 2; p++) {
2527 dst = s->dst[1 + p];
2528 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2529 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2530 uint8_t *ptr = dst, *ptr_r = dst_r;
2531 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2532 ptr_r += 4 * uvstep1d, n += step) {
2533 int mode = b->uvmode;
2534 uint8_t *a = &a_buf[32];
2535 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2537 mode = check_intra_mode(s, mode, &a, ptr_r,
2538 s->frames[CUR_FRAME].tf.f->linesize[1],
2539 ptr, s->uv_stride, l,
2540 col, x, w4, row, y, b->uvtx, p + 1);
2541 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2543 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2544 s->uvblock[p] + 16 * n, eob);
2546 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2547 dst += 4 * uvstep1d * s->uv_stride;
2552 static av_always_inline void mc_luma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2553 uint8_t *dst, ptrdiff_t dst_stride,
2554 const uint8_t *ref, ptrdiff_t ref_stride,
2555 ThreadFrame *ref_frame,
2556 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2557 int bw, int bh, int w, int h,
2558 const uint16_t *scale, const uint8_t *step)
2560 #define scale_mv(n, dim) (((int64_t)n * scale[dim]) >> 14)
2561 // BUG libvpx seems to scale the two components separately. This introduces
2562 // rounding errors but we have to reproduce them to be exactly compatible
2563 // with the output from libvpx...
2564 int mx = scale_mv(mv->x * 2, 0) + scale_mv(x * 16, 0);
2565 int my = scale_mv(mv->y * 2, 1) + scale_mv(y * 16, 1);
2566 int refbw_m1, refbh_m1;
2571 ref += y * ref_stride + x;
2574 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2575 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2576 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2577 // we use +7 because the last 7 pixels of each sbrow can be changed in
2578 // the longest loopfilter of the next sbrow
2579 th = (y + refbh_m1 + 4 + 7) >> 6;
2580 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2581 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2582 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2583 ref - 3 * ref_stride - 3,
2585 refbw_m1 + 8, refbh_m1 + 8,
2586 x - 3, y - 3, w, h);
2587 ref = s->edge_emu_buffer + 3 * 144 + 3;
2590 smc(dst, dst_stride, ref, ref_stride, bh, mx, my, step[0], step[1]);
2593 static av_always_inline void mc_chroma_scaled(VP9Context *s, vp9_scaled_mc_func smc,
2594 uint8_t *dst_u, uint8_t *dst_v,
2595 ptrdiff_t dst_stride,
2596 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2597 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2598 ThreadFrame *ref_frame,
2599 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2600 int bw, int bh, int w, int h,
2601 const uint16_t *scale, const uint8_t *step)
2603 // BUG https://code.google.com/p/webm/issues/detail?id=820
2604 int mx = scale_mv(mv->x, 0) + (scale_mv(x * 16, 0) & ~15) + (scale_mv(x * 32, 0) & 15);
2605 int my = scale_mv(mv->y, 1) + (scale_mv(y * 16, 1) & ~15) + (scale_mv(y * 32, 1) & 15);
2607 int refbw_m1, refbh_m1;
2612 ref_u += y * src_stride_u + x;
2613 ref_v += y * src_stride_v + x;
2616 refbw_m1 = ((bw - 1) * step[0] + mx) >> 4;
2617 refbh_m1 = ((bh - 1) * step[1] + my) >> 4;
2618 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2619 // we use +7 because the last 7 pixels of each sbrow can be changed in
2620 // the longest loopfilter of the next sbrow
2621 th = (y + refbh_m1 + 4 + 7) >> 5;
2622 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2623 if (x < 3 || y < 3 || x + 4 >= w - refbw_m1 || y + 4 >= h - refbh_m1) {
2624 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2625 ref_u - 3 * src_stride_u - 3,
2627 refbw_m1 + 8, refbh_m1 + 8,
2628 x - 3, y - 3, w, h);
2629 ref_u = s->edge_emu_buffer + 3 * 144 + 3;
2630 smc(dst_u, dst_stride, ref_u, 144, bh, mx, my, step[0], step[1]);
2632 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2633 ref_v - 3 * src_stride_v - 3,
2635 refbw_m1 + 8, refbh_m1 + 8,
2636 x - 3, y - 3, w, h);
2637 ref_v = s->edge_emu_buffer + 3 * 144 + 3;
2638 smc(dst_v, dst_stride, ref_v, 144, bh, mx, my, step[0], step[1]);
2640 smc(dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my, step[0], step[1]);
2641 smc(dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my, step[0], step[1]);
2645 #define FN(x) x##_scaled
2646 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2647 mc_luma_scaled(s, s->dsp.s##mc, dst, dst_ls, src, src_ls, tref, row, col, \
2648 mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2649 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2650 row, col, mv, bw, bh, w, h, i) \
2651 mc_chroma_scaled(s, s->dsp.s##mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2652 row, col, mv, bw, bh, w, h, s->mvscale[b->ref[i]], s->mvstep[b->ref[i]])
2653 #include "vp9_mc_template.c"
2655 #undef mc_chroma_dir
2658 static av_always_inline void mc_luma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2659 uint8_t *dst, ptrdiff_t dst_stride,
2660 const uint8_t *ref, ptrdiff_t ref_stride,
2661 ThreadFrame *ref_frame,
2662 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2663 int bw, int bh, int w, int h)
2665 int mx = mv->x, my = mv->y, th;
2669 ref += y * ref_stride + x;
2672 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2673 // we use +7 because the last 7 pixels of each sbrow can be changed in
2674 // the longest loopfilter of the next sbrow
2675 th = (y + bh + 4 * !!my + 7) >> 6;
2676 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2677 if (x < !!mx * 3 || y < !!my * 3 ||
2678 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2679 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2680 ref - !!my * 3 * ref_stride - !!mx * 3,
2682 bw + !!mx * 7, bh + !!my * 7,
2683 x - !!mx * 3, y - !!my * 3, w, h);
2684 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2687 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2690 static av_always_inline void mc_chroma_unscaled(VP9Context *s, vp9_mc_func (*mc)[2],
2691 uint8_t *dst_u, uint8_t *dst_v,
2692 ptrdiff_t dst_stride,
2693 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2694 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2695 ThreadFrame *ref_frame,
2696 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2697 int bw, int bh, int w, int h)
2699 int mx = mv->x, my = mv->y, th;
2703 ref_u += y * src_stride_u + x;
2704 ref_v += y * src_stride_v + x;
2707 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2708 // we use +7 because the last 7 pixels of each sbrow can be changed in
2709 // the longest loopfilter of the next sbrow
2710 th = (y + bh + 4 * !!my + 7) >> 5;
2711 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2712 if (x < !!mx * 3 || y < !!my * 3 ||
2713 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2714 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2715 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2717 bw + !!mx * 7, bh + !!my * 7,
2718 x - !!mx * 3, y - !!my * 3, w, h);
2719 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2720 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2722 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2723 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2725 bw + !!mx * 7, bh + !!my * 7,
2726 x - !!mx * 3, y - !!my * 3, w, h);
2727 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2728 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2730 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2731 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2736 #define mc_luma_dir(s, mc, dst, dst_ls, src, src_ls, tref, row, col, mv, bw, bh, w, h, i) \
2737 mc_luma_unscaled(s, s->dsp.mc, dst, dst_ls, src, src_ls, tref, row, col, \
2739 #define mc_chroma_dir(s, mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2740 row, col, mv, bw, bh, w, h, i) \
2741 mc_chroma_unscaled(s, s->dsp.mc, dstu, dstv, dst_ls, srcu, srcu_ls, srcv, srcv_ls, tref, \
2742 row, col, mv, bw, bh, w, h)
2743 #include "vp9_mc_template.c"
2744 #undef mc_luma_dir_dir
2745 #undef mc_chroma_dir_dir
2748 static void inter_recon(AVCodecContext *ctx)
2750 VP9Context *s = ctx->priv_data;
2752 int row = s->row, col = s->col;
2754 if (s->mvscale[b->ref[0]][0] || (b->comp && s->mvscale[b->ref[1]][0])) {
2755 inter_pred_scaled(ctx);
2760 /* mostly copied intra_recon() */
2762 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2763 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2764 int end_x = FFMIN(2 * (s->cols - col), w4);
2765 int end_y = FFMIN(2 * (s->rows - row), h4);
2766 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2767 int uvstep1d = 1 << b->uvtx, p;
2768 uint8_t *dst = s->dst[0];
2771 for (n = 0, y = 0; y < end_y; y += step1d) {
2773 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2774 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2777 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2778 s->block + 16 * n, eob);
2780 dst += 4 * s->y_stride * step1d;
2786 step = 1 << (b->uvtx * 2);
2787 for (p = 0; p < 2; p++) {
2788 dst = s->dst[p + 1];
2789 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2791 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2792 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2795 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2796 s->uvblock[p] + 16 * n, eob);
2798 dst += 4 * uvstep1d * s->uv_stride;
2804 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2805 int row_and_7, int col_and_7,
2806 int w, int h, int col_end, int row_end,
2807 enum TxfmMode tx, int skip_inter)
2809 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2810 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2811 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2812 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2814 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2815 // edges. This means that for UV, we work on two subsampled blocks at
2816 // a time, and we only use the topleft block's mode information to set
2817 // things like block strength. Thus, for any block size smaller than
2818 // 16x16, ignore the odd portion of the block.
2819 if (tx == TX_4X4 && is_uv) {
2834 if (tx == TX_4X4 && !skip_inter) {
2835 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2836 int m_col_odd = (t << (w - 1)) - t;
2838 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2840 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2842 for (y = row_and_7; y < h + row_and_7; y++) {
2843 int col_mask_id = 2 - !(y & 7);
2845 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2846 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2847 // for odd lines, if the odd col is not being filtered,
2848 // skip odd row also:
2855 // if a/c are even row/col and b/d are odd, and d is skipped,
2856 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2857 if ((col_end & 1) && (y & 1)) {
2858 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2860 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2864 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2866 for (y = row_and_7; y < h + row_and_7; y++) {
2867 int col_mask_id = 2 - !(y & 3);
2869 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2870 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2871 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2872 lflvl->mask[is_uv][0][y][3] |= m_col;
2873 lflvl->mask[is_uv][1][y][3] |= m_col;
2877 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2880 int mask_id = (tx == TX_8X8);
2881 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2882 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2883 int m_row = m_col & masks[l2];
2885 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2886 // 8wd loopfilter to prevent going off the visible edge.
2887 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2888 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2889 int m_row_8 = m_row - m_row_16;
2891 for (y = row_and_7; y < h + row_and_7; y++) {
2892 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2893 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2896 for (y = row_and_7; y < h + row_and_7; y++)
2897 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2900 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2901 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2902 lflvl->mask[is_uv][1][y][0] |= m_col;
2903 if (y - row_and_7 == h - 1)
2904 lflvl->mask[is_uv][1][y][1] |= m_col;
2906 for (y = row_and_7; y < h + row_and_7; y += step1d)
2907 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2909 } else if (tx != TX_4X4) {
2912 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2913 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2914 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2915 for (y = row_and_7; y < h + row_and_7; y++)
2916 lflvl->mask[is_uv][0][y][mask_id] |= t;
2918 int t8 = t & 0x01, t4 = t - t8;
2920 for (y = row_and_7; y < h + row_and_7; y++) {
2921 lflvl->mask[is_uv][0][y][2] |= t4;
2922 lflvl->mask[is_uv][0][y][1] |= t8;
2924 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2926 int t8 = t & 0x11, t4 = t - t8;
2928 for (y = row_and_7; y < h + row_and_7; y++) {
2929 lflvl->mask[is_uv][0][y][2] |= t4;
2930 lflvl->mask[is_uv][0][y][1] |= t8;
2932 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2937 static void decode_b(AVCodecContext *ctx, int row, int col,
2938 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2939 enum BlockLevel bl, enum BlockPartition bp)
2941 VP9Context *s = ctx->priv_data;
2943 enum BlockSize bs = bl * 3 + bp;
2944 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2946 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2952 s->min_mv.x = -(128 + col * 64);
2953 s->min_mv.y = -(128 + row * 64);
2954 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2955 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2961 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2968 #define SPLAT_ZERO_CTX(v, n) \
2970 case 1: v = 0; break; \
2971 case 2: AV_ZERO16(&v); break; \
2972 case 4: AV_ZERO32(&v); break; \
2973 case 8: AV_ZERO64(&v); break; \
2974 case 16: AV_ZERO128(&v); break; \
2976 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2978 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2979 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2980 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2984 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2985 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2986 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2987 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2990 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2991 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2992 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2993 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2998 s->block += w4 * h4 * 64;
2999 s->uvblock[0] += w4 * h4 * 16;
3000 s->uvblock[1] += w4 * h4 * 16;
3001 s->eob += 4 * w4 * h4;
3002 s->uveob[0] += w4 * h4;
3003 s->uveob[1] += w4 * h4;
3009 // emulated overhangs if the stride of the target buffer can't hold. This
3010 // allows to support emu-edge and so on even if we have large block
3012 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3013 (row + h4) > s->rows;
3014 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3015 (row + h4) > s->rows;
3017 s->dst[0] = s->tmp_y;
3020 s->dst[0] = f->data[0] + yoff;
3021 s->y_stride = f->linesize[0];
3024 s->dst[1] = s->tmp_uv[0];
3025 s->dst[2] = s->tmp_uv[1];
3028 s->dst[1] = f->data[1] + uvoff;
3029 s->dst[2] = f->data[2] + uvoff;
3030 s->uv_stride = f->linesize[1];
3033 intra_recon(ctx, yoff, uvoff);
3038 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3040 for (n = 0; o < w; n++) {
3045 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3046 s->tmp_y + o, 64, h, 0, 0);
3052 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3054 for (n = 1; o < w; n++) {
3059 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3060 s->tmp_uv[0] + o, 32, h, 0, 0);
3061 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3062 s->tmp_uv[1] + o, 32, h, 0, 0);
3068 // pick filter level and find edges to apply filter to
3069 if (s->filter.level &&
3070 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3071 [b->mode[3] != ZEROMV]) > 0) {
3072 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3073 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3075 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3076 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3077 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3078 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3079 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3080 b->uvtx, skip_inter);
3082 if (!s->filter.lim_lut[lvl]) {
3083 int sharp = s->filter.sharpness;
3087 limit >>= (sharp + 3) >> 2;
3088 limit = FFMIN(limit, 9 - sharp);
3090 limit = FFMAX(limit, 1);
3092 s->filter.lim_lut[lvl] = limit;
3093 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3099 s->block += w4 * h4 * 64;
3100 s->uvblock[0] += w4 * h4 * 16;
3101 s->uvblock[1] += w4 * h4 * 16;
3102 s->eob += 4 * w4 * h4;
3103 s->uveob[0] += w4 * h4;
3104 s->uveob[1] += w4 * h4;
3108 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3109 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3111 VP9Context *s = ctx->priv_data;
3112 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3113 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3114 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3115 s->prob.p.partition[bl][c];
3116 enum BlockPartition bp;
3117 ptrdiff_t hbs = 4 >> bl;
3118 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3119 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3122 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3123 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3124 } else if (col + hbs < s->cols) { // FIXME why not <=?
3125 if (row + hbs < s->rows) { // FIXME why not <=?
3126 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3128 case PARTITION_NONE:
3129 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3132 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3133 yoff += hbs * 8 * y_stride;
3134 uvoff += hbs * 4 * uv_stride;
3135 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3138 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3141 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3143 case PARTITION_SPLIT:
3144 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3145 decode_sb(ctx, row, col + hbs, lflvl,
3146 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3147 yoff += hbs * 8 * y_stride;
3148 uvoff += hbs * 4 * uv_stride;
3149 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3150 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3151 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3156 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3157 bp = PARTITION_SPLIT;
3158 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3159 decode_sb(ctx, row, col + hbs, lflvl,
3160 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3163 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3165 } else if (row + hbs < s->rows) { // FIXME why not <=?
3166 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3167 bp = PARTITION_SPLIT;
3168 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3169 yoff += hbs * 8 * y_stride;
3170 uvoff += hbs * 4 * uv_stride;
3171 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3174 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3177 bp = PARTITION_SPLIT;
3178 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3180 s->counts.partition[bl][c][bp]++;
3183 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3184 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3186 VP9Context *s = ctx->priv_data;
3188 ptrdiff_t hbs = 4 >> bl;
3189 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3190 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3193 av_assert2(b->bl == BL_8X8);
3194 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3195 } else if (s->b->bl == bl) {
3196 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3197 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3198 yoff += hbs * 8 * y_stride;
3199 uvoff += hbs * 4 * uv_stride;
3200 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3201 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3204 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3207 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3208 if (col + hbs < s->cols) { // FIXME why not <=?
3209 if (row + hbs < s->rows) {
3210 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3211 uvoff + 4 * hbs, bl + 1);
3212 yoff += hbs * 8 * y_stride;
3213 uvoff += hbs * 4 * uv_stride;
3214 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3215 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3216 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3220 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3222 } else if (row + hbs < s->rows) {
3223 yoff += hbs * 8 * y_stride;
3224 uvoff += hbs * 4 * uv_stride;
3225 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3230 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3231 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3233 VP9Context *s = ctx->priv_data;
3234 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3235 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3236 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3239 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3240 // if you think of them as acting on a 8x8 block max, we can interleave
3241 // each v/h within the single x loop, but that only works if we work on
3242 // 8 pixel blocks, and we won't always do that (we want at least 16px
3243 // to use SSE2 optimizations, perhaps 32 for AVX2)
3245 // filter edges between columns, Y plane (e.g. block1 | block2)
3246 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3247 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3248 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3249 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3250 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3251 unsigned hm = hm1 | hm2 | hm13 | hm23;
3253 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3255 int L = *l, H = L >> 4;
3256 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3259 if (hmask1[0] & x) {
3260 if (hmask2[0] & x) {
3261 av_assert2(l[8] == L);
3262 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3264 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3266 } else if (hm2 & x) {
3269 E |= s->filter.mblim_lut[L] << 8;
3270 I |= s->filter.lim_lut[L] << 8;
3271 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3273 [0](ptr, ls_y, E, I, H);
3275 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3276 [0](ptr, ls_y, E, I, H);
3279 } else if (hm2 & x) {
3280 int L = l[8], H = L >> 4;
3281 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3284 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3285 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3289 int L = *l, H = L >> 4;
3290 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3295 E |= s->filter.mblim_lut[L] << 8;
3296 I |= s->filter.lim_lut[L] << 8;
3297 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3299 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3301 } else if (hm23 & x) {
3302 int L = l[8], H = L >> 4;
3303 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3305 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3311 // filter edges between rows, Y plane (e.g. ------)
3313 dst = f->data[0] + yoff;
3315 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3316 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3317 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3319 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3322 int L = *l, H = L >> 4;
3323 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3326 if (vmask[0] & (x << 1)) {
3327 av_assert2(l[1] == L);
3328 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3330 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3332 } else if (vm & (x << 1)) {
3335 E |= s->filter.mblim_lut[L] << 8;
3336 I |= s->filter.lim_lut[L] << 8;
3337 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3338 [!!(vmask[1] & (x << 1))]
3339 [1](ptr, ls_y, E, I, H);
3341 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3342 [1](ptr, ls_y, E, I, H);
3344 } else if (vm & (x << 1)) {
3345 int L = l[1], H = L >> 4;
3346 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3348 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3349 [1](ptr + 8, ls_y, E, I, H);
3353 int L = *l, H = L >> 4;
3354 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3356 if (vm3 & (x << 1)) {
3359 E |= s->filter.mblim_lut[L] << 8;
3360 I |= s->filter.lim_lut[L] << 8;
3361 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3363 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3365 } else if (vm3 & (x << 1)) {
3366 int L = l[1], H = L >> 4;
3367 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3369 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3374 // same principle but for U/V planes
3375 for (p = 0; p < 2; p++) {
3377 dst = f->data[1 + p] + uvoff;
3378 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3379 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3380 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3381 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3382 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3384 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3387 int L = *l, H = L >> 4;
3388 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3390 if (hmask1[0] & x) {
3391 if (hmask2[0] & x) {
3392 av_assert2(l[16] == L);
3393 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3395 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3397 } else if (hm2 & x) {
3400 E |= s->filter.mblim_lut[L] << 8;
3401 I |= s->filter.lim_lut[L] << 8;
3402 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3404 [0](ptr, ls_uv, E, I, H);
3406 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3407 [0](ptr, ls_uv, E, I, H);
3409 } else if (hm2 & x) {
3410 int L = l[16], H = L >> 4;
3411 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3413 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3414 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3422 dst = f->data[1 + p] + uvoff;
3423 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3424 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3425 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3427 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3430 int L = *l, H = L >> 4;
3431 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3434 if (vmask[0] & (x << 2)) {
3435 av_assert2(l[2] == L);
3436 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3438 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3440 } else if (vm & (x << 2)) {
3443 E |= s->filter.mblim_lut[L] << 8;
3444 I |= s->filter.lim_lut[L] << 8;
3445 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3446 [!!(vmask[1] & (x << 2))]
3447 [1](ptr, ls_uv, E, I, H);
3449 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3450 [1](ptr, ls_uv, E, I, H);
3452 } else if (vm & (x << 2)) {
3453 int L = l[2], H = L >> 4;
3454 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3456 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3457 [1](ptr + 8, ls_uv, E, I, H);
3467 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3469 int sb_start = ( idx * n) >> log2_n;
3470 int sb_end = ((idx + 1) * n) >> log2_n;
3471 *start = FFMIN(sb_start, n) << 3;
3472 *end = FFMIN(sb_end, n) << 3;
3475 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3476 int max_count, int update_factor)
3478 unsigned ct = ct0 + ct1, p2, p1;
3484 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3485 p2 = av_clip(p2, 1, 255);
3486 ct = FFMIN(ct, max_count);
3487 update_factor = FASTDIV(update_factor * ct, max_count);
3489 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3490 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3493 static void adapt_probs(VP9Context *s)
3496 prob_context *p = &s->prob_ctx[s->framectxid].p;
3497 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3500 for (i = 0; i < 4; i++)
3501 for (j = 0; j < 2; j++)
3502 for (k = 0; k < 2; k++)
3503 for (l = 0; l < 6; l++)
3504 for (m = 0; m < 6; m++) {
3505 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3506 unsigned *e = s->counts.eob[i][j][k][l][m];
3507 unsigned *c = s->counts.coef[i][j][k][l][m];
3509 if (l == 0 && m >= 3) // dc only has 3 pt
3512 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3513 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3514 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3517 if (s->keyframe || s->intraonly) {
3518 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3519 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3520 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3521 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3526 for (i = 0; i < 3; i++)
3527 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3530 for (i = 0; i < 4; i++)
3531 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3534 if (s->comppredmode == PRED_SWITCHABLE) {
3535 for (i = 0; i < 5; i++)
3536 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3540 if (s->comppredmode != PRED_SINGLEREF) {
3541 for (i = 0; i < 5; i++)
3542 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3543 s->counts.comp_ref[i][1], 20, 128);
3546 if (s->comppredmode != PRED_COMPREF) {
3547 for (i = 0; i < 5; i++) {
3548 uint8_t *pp = p->single_ref[i];
3549 unsigned (*c)[2] = s->counts.single_ref[i];
3551 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3552 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3556 // block partitioning
3557 for (i = 0; i < 4; i++)
3558 for (j = 0; j < 4; j++) {
3559 uint8_t *pp = p->partition[i][j];
3560 unsigned *c = s->counts.partition[i][j];
3562 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3563 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3564 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3568 if (s->txfmmode == TX_SWITCHABLE) {
3569 for (i = 0; i < 2; i++) {
3570 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3572 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3573 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3574 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3575 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3576 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3577 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3581 // interpolation filter
3582 if (s->filtermode == FILTER_SWITCHABLE) {
3583 for (i = 0; i < 4; i++) {
3584 uint8_t *pp = p->filter[i];
3585 unsigned *c = s->counts.filter[i];
3587 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3588 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3593 for (i = 0; i < 7; i++) {
3594 uint8_t *pp = p->mv_mode[i];
3595 unsigned *c = s->counts.mv_mode[i];
3597 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3598 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3599 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3604 uint8_t *pp = p->mv_joint;
3605 unsigned *c = s->counts.mv_joint;
3607 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3608 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3609 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3613 for (i = 0; i < 2; i++) {
3615 unsigned *c, (*c2)[2], sum;
3617 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3618 s->counts.mv_comp[i].sign[1], 20, 128);
3620 pp = p->mv_comp[i].classes;
3621 c = s->counts.mv_comp[i].classes;
3622 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3623 adapt_prob(&pp[0], c[0], sum, 20, 128);
3625 adapt_prob(&pp[1], c[1], sum, 20, 128);
3627 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3628 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3630 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3631 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3633 adapt_prob(&pp[6], c[6], sum, 20, 128);
3634 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3635 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3636 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3638 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3639 s->counts.mv_comp[i].class0[1], 20, 128);
3640 pp = p->mv_comp[i].bits;
3641 c2 = s->counts.mv_comp[i].bits;
3642 for (j = 0; j < 10; j++)
3643 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3645 for (j = 0; j < 2; j++) {
3646 pp = p->mv_comp[i].class0_fp[j];
3647 c = s->counts.mv_comp[i].class0_fp[j];
3648 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3649 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3650 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3652 pp = p->mv_comp[i].fp;
3653 c = s->counts.mv_comp[i].fp;
3654 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3655 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3656 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3658 if (s->highprecisionmvs) {
3659 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3660 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3661 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3662 s->counts.mv_comp[i].hp[1], 20, 128);
3667 for (i = 0; i < 4; i++) {
3668 uint8_t *pp = p->y_mode[i];
3669 unsigned *c = s->counts.y_mode[i], sum, s2;
3671 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3672 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3673 sum -= c[TM_VP8_PRED];
3674 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3675 sum -= c[VERT_PRED];
3676 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3677 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3679 adapt_prob(&pp[3], s2, sum, 20, 128);
3681 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3682 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3683 sum -= c[DIAG_DOWN_LEFT_PRED];
3684 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3685 sum -= c[VERT_LEFT_PRED];
3686 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3687 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3691 for (i = 0; i < 10; i++) {
3692 uint8_t *pp = p->uv_mode[i];
3693 unsigned *c = s->counts.uv_mode[i], sum, s2;
3695 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3696 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3697 sum -= c[TM_VP8_PRED];
3698 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3699 sum -= c[VERT_PRED];
3700 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3701 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3703 adapt_prob(&pp[3], s2, sum, 20, 128);
3705 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3706 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3707 sum -= c[DIAG_DOWN_LEFT_PRED];
3708 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3709 sum -= c[VERT_LEFT_PRED];
3710 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3711 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3715 static void free_buffers(VP9Context *s)
3717 av_freep(&s->intra_pred_data[0]);
3718 av_freep(&s->b_base);
3719 av_freep(&s->block_base);
3722 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3724 VP9Context *s = ctx->priv_data;
3727 for (i = 0; i < 3; i++) {
3728 if (s->frames[i].tf.f->data[0])
3729 vp9_unref_frame(ctx, &s->frames[i]);
3730 av_frame_free(&s->frames[i].tf.f);
3732 for (i = 0; i < 8; i++) {
3733 if (s->refs[i].f->data[0])
3734 ff_thread_release_buffer(ctx, &s->refs[i]);
3735 av_frame_free(&s->refs[i].f);
3736 if (s->next_refs[i].f->data[0])
3737 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3738 av_frame_free(&s->next_refs[i].f);
3748 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3749 int *got_frame, AVPacket *pkt)
3751 const uint8_t *data = pkt->data;
3752 int size = pkt->size;
3753 VP9Context *s = ctx->priv_data;
3754 int res, tile_row, tile_col, i, ref, row, col;
3755 int retain_segmap_ref = s->segmentation.enabled && !s->segmentation.update_map;
3756 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3759 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3761 } else if (res == 0) {
3762 if (!s->refs[ref].f->data[0]) {
3763 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3764 return AVERROR_INVALIDDATA;
3766 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3768 ((AVFrame *)frame)->pkt_pts = pkt->pts;
3769 ((AVFrame *)frame)->pkt_dts = pkt->dts;
3776 if (!retain_segmap_ref) {
3777 if (s->frames[REF_FRAME_SEGMAP].tf.f->data[0])
3778 vp9_unref_frame(ctx, &s->frames[REF_FRAME_SEGMAP]);
3779 if (!s->keyframe && !s->intraonly && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3780 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_SEGMAP], &s->frames[CUR_FRAME])) < 0)
3783 if (s->frames[REF_FRAME_MVPAIR].tf.f->data[0])
3784 vp9_unref_frame(ctx, &s->frames[REF_FRAME_MVPAIR]);
3785 if (!s->intraonly && !s->keyframe && !s->errorres && s->frames[CUR_FRAME].tf.f->data[0] &&
3786 (res = vp9_ref_frame(ctx, &s->frames[REF_FRAME_MVPAIR], &s->frames[CUR_FRAME])) < 0)
3788 if (s->frames[CUR_FRAME].tf.f->data[0])
3789 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3790 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3792 f = s->frames[CUR_FRAME].tf.f;
3793 f->key_frame = s->keyframe;
3794 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3795 ls_y = f->linesize[0];
3796 ls_uv =f->linesize[1];
3799 for (i = 0; i < 8; i++) {
3800 if (s->next_refs[i].f->data[0])
3801 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3802 if (s->refreshrefmask & (1 << i)) {
3803 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3805 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3812 ctx->color_range = AVCOL_RANGE_JPEG;
3814 ctx->color_range = AVCOL_RANGE_MPEG;
3816 switch (s->colorspace) {
3817 case 1: ctx->colorspace = AVCOL_SPC_BT470BG; break;
3818 case 2: ctx->colorspace = AVCOL_SPC_BT709; break;
3819 case 3: ctx->colorspace = AVCOL_SPC_SMPTE170M; break;
3820 case 4: ctx->colorspace = AVCOL_SPC_SMPTE240M; break;
3823 // main tile decode loop
3824 memset(s->above_partition_ctx, 0, s->cols);
3825 memset(s->above_skip_ctx, 0, s->cols);
3826 if (s->keyframe || s->intraonly) {
3827 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3829 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3831 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3832 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3833 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3834 memset(s->above_segpred_ctx, 0, s->cols);
3835 s->pass = s->frames[CUR_FRAME].uses_2pass =
3836 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3837 if ((res = update_block_buffers(ctx)) < 0) {
3838 av_log(ctx, AV_LOG_ERROR,
3839 "Failed to allocate block buffers\n");
3842 if (s->refreshctx && s->parallelmode) {
3845 for (i = 0; i < 4; i++) {
3846 for (j = 0; j < 2; j++)
3847 for (k = 0; k < 2; k++)
3848 for (l = 0; l < 6; l++)
3849 for (m = 0; m < 6; m++)
3850 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3851 s->prob.coef[i][j][k][l][m], 3);
3852 if (s->txfmmode == i)
3855 s->prob_ctx[s->framectxid].p = s->prob.p;
3856 ff_thread_finish_setup(ctx);
3857 } else if (!s->refreshctx) {
3858 ff_thread_finish_setup(ctx);
3864 s->block = s->block_base;
3865 s->uvblock[0] = s->uvblock_base[0];
3866 s->uvblock[1] = s->uvblock_base[1];
3867 s->eob = s->eob_base;
3868 s->uveob[0] = s->uveob_base[0];
3869 s->uveob[1] = s->uveob_base[1];
3871 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3872 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3873 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3875 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3878 if (tile_col == s->tiling.tile_cols - 1 &&
3879 tile_row == s->tiling.tile_rows - 1) {
3882 tile_size = AV_RB32(data);
3886 if (tile_size > size) {
3887 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3888 return AVERROR_INVALIDDATA;
3890 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3891 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3892 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3893 return AVERROR_INVALIDDATA;
3900 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3901 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3902 struct VP9Filter *lflvl_ptr = s->lflvl;
3903 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3905 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3906 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3907 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3910 memset(s->left_partition_ctx, 0, 8);
3911 memset(s->left_skip_ctx, 0, 8);
3912 if (s->keyframe || s->intraonly) {
3913 memset(s->left_mode_ctx, DC_PRED, 16);
3915 memset(s->left_mode_ctx, NEARESTMV, 8);
3917 memset(s->left_y_nnz_ctx, 0, 16);
3918 memset(s->left_uv_nnz_ctx, 0, 16);
3919 memset(s->left_segpred_ctx, 0, 8);
3921 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3924 for (col = s->tiling.tile_col_start;
3925 col < s->tiling.tile_col_end;
3926 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3927 // FIXME integrate with lf code (i.e. zero after each
3928 // use, similar to invtxfm coefficients, or similar)
3930 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3934 decode_sb_mem(ctx, row, col, lflvl_ptr,
3935 yoff2, uvoff2, BL_64X64);
3937 decode_sb(ctx, row, col, lflvl_ptr,
3938 yoff2, uvoff2, BL_64X64);
3942 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3950 // backup pre-loopfilter reconstruction data for intra
3951 // prediction of next row of sb64s
3952 if (row + 8 < s->rows) {
3953 memcpy(s->intra_pred_data[0],
3954 f->data[0] + yoff + 63 * ls_y,
3956 memcpy(s->intra_pred_data[1],
3957 f->data[1] + uvoff + 31 * ls_uv,
3959 memcpy(s->intra_pred_data[2],
3960 f->data[2] + uvoff + 31 * ls_uv,
3964 // loopfilter one row
3965 if (s->filter.level) {
3968 lflvl_ptr = s->lflvl;
3969 for (col = 0; col < s->cols;
3970 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3971 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3975 // FIXME maybe we can make this more finegrained by running the
3976 // loopfilter per-block instead of after each sbrow
3977 // In fact that would also make intra pred left preparation easier?
3978 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3982 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3984 ff_thread_finish_setup(ctx);
3986 } while (s->pass++ == 1);
3987 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3990 for (i = 0; i < 8; i++) {
3991 if (s->refs[i].f->data[0])
3992 ff_thread_release_buffer(ctx, &s->refs[i]);
3993 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3996 if (!s->invisible) {
3997 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
4005 static void vp9_decode_flush(AVCodecContext *ctx)
4007 VP9Context *s = ctx->priv_data;
4010 for (i = 0; i < 3; i++)
4011 vp9_unref_frame(ctx, &s->frames[i]);
4012 for (i = 0; i < 8; i++)
4013 ff_thread_release_buffer(ctx, &s->refs[i]);
4016 static int init_frames(AVCodecContext *ctx)
4018 VP9Context *s = ctx->priv_data;
4021 for (i = 0; i < 3; i++) {
4022 s->frames[i].tf.f = av_frame_alloc();
4023 if (!s->frames[i].tf.f) {
4024 vp9_decode_free(ctx);
4025 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4026 return AVERROR(ENOMEM);
4029 for (i = 0; i < 8; i++) {
4030 s->refs[i].f = av_frame_alloc();
4031 s->next_refs[i].f = av_frame_alloc();
4032 if (!s->refs[i].f || !s->next_refs[i].f) {
4033 vp9_decode_free(ctx);
4034 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4035 return AVERROR(ENOMEM);
4042 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4044 VP9Context *s = ctx->priv_data;
4046 ctx->internal->allocate_progress = 1;
4047 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4048 ff_vp9dsp_init(&s->dsp);
4049 ff_videodsp_init(&s->vdsp, 8);
4050 s->filter.sharpness = -1;
4052 return init_frames(ctx);
4055 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4057 return init_frames(avctx);
4060 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4063 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4065 // detect size changes in other threads
4066 if (s->intra_pred_data[0] &&
4067 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4071 for (i = 0; i < 3; i++) {
4072 if (s->frames[i].tf.f->data[0])
4073 vp9_unref_frame(dst, &s->frames[i]);
4074 if (ssrc->frames[i].tf.f->data[0]) {
4075 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4079 for (i = 0; i < 8; i++) {
4080 if (s->refs[i].f->data[0])
4081 ff_thread_release_buffer(dst, &s->refs[i]);
4082 if (ssrc->next_refs[i].f->data[0]) {
4083 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4088 s->invisible = ssrc->invisible;
4089 s->keyframe = ssrc->keyframe;
4090 s->segmentation.enabled = ssrc->segmentation.enabled;
4091 s->segmentation.update_map = ssrc->segmentation.update_map;
4092 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4093 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4094 if (ssrc->segmentation.enabled) {
4095 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4096 sizeof(s->segmentation.feat));
4102 AVCodec ff_vp9_decoder = {
4104 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4105 .type = AVMEDIA_TYPE_VIDEO,
4106 .id = AV_CODEC_ID_VP9,
4107 .priv_data_size = sizeof(VP9Context),
4108 .init = vp9_decode_init,
4109 .close = vp9_decode_free,
4110 .decode = vp9_decode_frame,
4111 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4112 .flush = vp9_decode_flush,
4113 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4114 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),