2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
148 #define MAX_SEGMENT 8
152 uint8_t absolute_vals;
158 uint8_t skip_enabled;
167 unsigned log2_tile_cols, log2_tile_rows;
168 unsigned tile_cols, tile_rows;
169 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
171 unsigned sb_cols, sb_rows, rows, cols;
174 uint8_t coef[4][2][2][6][6][3];
178 uint8_t coef[4][2][2][6][6][11];
183 unsigned y_mode[4][10];
184 unsigned uv_mode[10][10];
185 unsigned filter[4][3];
186 unsigned mv_mode[7][4];
187 unsigned intra[4][2];
189 unsigned single_ref[5][2][2];
190 unsigned comp_ref[5][2];
191 unsigned tx32p[2][4];
192 unsigned tx16p[2][3];
195 unsigned mv_joint[4];
198 unsigned classes[11];
200 unsigned bits[10][2];
201 unsigned class0_fp[2][4];
203 unsigned class0_hp[2];
206 unsigned partition[4][4][4];
207 unsigned coef[4][2][2][6][6][3];
208 unsigned eob[4][2][2][6][6][2];
210 enum TxfmMode txfmmode;
211 enum CompPredMode comppredmode;
213 // contextual (left/above) cache
214 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
215 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
216 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
217 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
218 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
225 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
226 uint8_t *above_partition_ctx;
227 uint8_t *above_mode_ctx;
228 // FIXME maybe merge some of the below in a flags field?
229 uint8_t *above_y_nnz_ctx;
230 uint8_t *above_uv_nnz_ctx[2];
231 uint8_t *above_skip_ctx; // 1bit
232 uint8_t *above_txfm_ctx; // 2bit
233 uint8_t *above_segpred_ctx; // 1bit
234 uint8_t *above_intra_ctx; // 1bit
235 uint8_t *above_comp_ctx; // 1bit
236 uint8_t *above_ref_ctx; // 2bit
237 uint8_t *above_filter_ctx;
238 VP56mv (*above_mv_ctx)[2];
241 uint8_t *intra_pred_data[3];
242 struct VP9Filter *lflvl;
243 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
245 // block reconstruction intermediates
246 int block_alloc_using_2pass;
247 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
248 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
249 struct { int x, y; } min_mv, max_mv;
250 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
251 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
254 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
256 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
257 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
259 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
260 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
264 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
266 VP9Context *s = ctx->priv_data;
269 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
271 sz = 64 * s->sb_cols * s->sb_rows;
272 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
273 ff_thread_release_buffer(ctx, &f->tf);
274 return AVERROR(ENOMEM);
277 f->segmentation_map = f->extradata->data;
278 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
280 // retain segmentation map if it doesn't update
281 if (s->segmentation.enabled && !s->segmentation.update_map &&
282 !s->intraonly && !s->keyframe && !s->errorres) {
283 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
289 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
291 ff_thread_release_buffer(ctx, &f->tf);
292 av_buffer_unref(&f->extradata);
295 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
299 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
301 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
302 vp9_unref_frame(ctx, dst);
303 return AVERROR(ENOMEM);
306 dst->segmentation_map = src->segmentation_map;
312 static int update_size(AVCodecContext *ctx, int w, int h)
314 VP9Context *s = ctx->priv_data;
317 av_assert0(w > 0 && h > 0);
319 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
324 s->sb_cols = (w + 63) >> 6;
325 s->sb_rows = (h + 63) >> 6;
326 s->cols = (w + 7) >> 3;
327 s->rows = (h + 7) >> 3;
329 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
330 av_freep(&s->intra_pred_data[0]);
331 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
333 return AVERROR(ENOMEM);
334 assign(s->intra_pred_data[0], uint8_t *, 64);
335 assign(s->intra_pred_data[1], uint8_t *, 32);
336 assign(s->intra_pred_data[2], uint8_t *, 32);
337 assign(s->above_y_nnz_ctx, uint8_t *, 16);
338 assign(s->above_mode_ctx, uint8_t *, 16);
339 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
340 assign(s->above_partition_ctx, uint8_t *, 8);
341 assign(s->above_skip_ctx, uint8_t *, 8);
342 assign(s->above_txfm_ctx, uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
344 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
345 assign(s->above_segpred_ctx, uint8_t *, 8);
346 assign(s->above_intra_ctx, uint8_t *, 8);
347 assign(s->above_comp_ctx, uint8_t *, 8);
348 assign(s->above_ref_ctx, uint8_t *, 8);
349 assign(s->above_filter_ctx, uint8_t *, 8);
350 assign(s->lflvl, struct VP9Filter *, 1);
353 // these will be re-allocated a little later
354 av_freep(&s->b_base);
355 av_freep(&s->block_base);
360 static int update_block_buffers(AVCodecContext *ctx)
362 VP9Context *s = ctx->priv_data;
364 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
368 av_free(s->block_base);
370 int sbs = s->sb_cols * s->sb_rows;
372 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
373 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
374 if (!s->b_base || !s->block_base)
375 return AVERROR(ENOMEM);
376 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
377 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
378 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
379 s->uveob_base[0] = s->eob_base + 256 * sbs;
380 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
382 s->b_base = av_malloc(sizeof(VP9Block));
383 s->block_base = av_mallocz((64 * 64 + 128) * 3);
384 if (!s->b_base || !s->block_base)
385 return AVERROR(ENOMEM);
386 s->uvblock_base[0] = s->block_base + 64 * 64;
387 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
388 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
389 s->uveob_base[0] = s->eob_base + 256;
390 s->uveob_base[1] = s->uveob_base[0] + 64;
392 s->block_alloc_using_2pass = s->uses_2pass;
397 // for some reason the sign bit is at the end, not the start, of a bit sequence
398 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
400 int v = get_bits(gb, n);
401 return get_bits1(gb) ? -v : v;
404 static av_always_inline int inv_recenter_nonneg(int v, int m)
406 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
409 // differential forward probability updates
410 static int update_prob(VP56RangeCoder *c, int p)
412 static const int inv_map_table[254] = {
413 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
414 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
415 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
416 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
417 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
418 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
419 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
420 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
421 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
422 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
423 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
424 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
425 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
426 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
427 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
428 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
429 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
430 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
435 /* This code is trying to do a differential probability update. For a
436 * current probability A in the range [1, 255], the difference to a new
437 * probability of any value can be expressed differentially as 1-A,255-A
438 * where some part of this (absolute range) exists both in positive as
439 * well as the negative part, whereas another part only exists in one
440 * half. We're trying to code this shared part differentially, i.e.
441 * times two where the value of the lowest bit specifies the sign, and
442 * the single part is then coded on top of this. This absolute difference
443 * then again has a value of [0,254], but a bigger value in this range
444 * indicates that we're further away from the original value A, so we
445 * can code this as a VLC code, since higher values are increasingly
446 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
447 * updates vs. the 'fine, exact' updates further down the range, which
448 * adds one extra dimension to this differential update model. */
450 if (!vp8_rac_get(c)) {
451 d = vp8_rac_get_uint(c, 4) + 0;
452 } else if (!vp8_rac_get(c)) {
453 d = vp8_rac_get_uint(c, 4) + 16;
454 } else if (!vp8_rac_get(c)) {
455 d = vp8_rac_get_uint(c, 5) + 32;
457 d = vp8_rac_get_uint(c, 7);
459 d = (d << 1) - 65 + vp8_rac_get(c);
463 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
464 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
467 static int decode_frame_header(AVCodecContext *ctx,
468 const uint8_t *data, int size, int *ref)
470 VP9Context *s = ctx->priv_data;
471 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
473 const uint8_t *data2;
476 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
477 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
480 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
481 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
482 return AVERROR_INVALIDDATA;
484 s->profile = get_bits1(&s->gb);
485 if (get_bits1(&s->gb)) { // reserved bit
486 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
487 return AVERROR_INVALIDDATA;
489 if (get_bits1(&s->gb)) {
490 *ref = get_bits(&s->gb, 3);
493 s->last_uses_2pass = s->uses_2pass;
494 s->last_keyframe = s->keyframe;
495 s->keyframe = !get_bits1(&s->gb);
496 last_invisible = s->invisible;
497 s->invisible = !get_bits1(&s->gb);
498 s->errorres = get_bits1(&s->gb);
499 s->use_last_frame_mvs = !s->errorres && !last_invisible;
501 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
502 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
503 return AVERROR_INVALIDDATA;
505 s->colorspace = get_bits(&s->gb, 3);
506 if (s->colorspace == 7) { // RGB = profile 1
507 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
508 return AVERROR_INVALIDDATA;
510 s->fullrange = get_bits1(&s->gb);
511 // for profile 1, here follows the subsampling bits
512 s->refreshrefmask = 0xff;
513 w = get_bits(&s->gb, 16) + 1;
514 h = get_bits(&s->gb, 16) + 1;
515 if (get_bits1(&s->gb)) // display size
516 skip_bits(&s->gb, 32);
518 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
519 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
521 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
522 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
523 return AVERROR_INVALIDDATA;
525 s->refreshrefmask = get_bits(&s->gb, 8);
526 w = get_bits(&s->gb, 16) + 1;
527 h = get_bits(&s->gb, 16) + 1;
528 if (get_bits1(&s->gb)) // display size
529 skip_bits(&s->gb, 32);
531 s->refreshrefmask = get_bits(&s->gb, 8);
532 s->refidx[0] = get_bits(&s->gb, 3);
533 s->signbias[0] = get_bits1(&s->gb);
534 s->refidx[1] = get_bits(&s->gb, 3);
535 s->signbias[1] = get_bits1(&s->gb);
536 s->refidx[2] = get_bits(&s->gb, 3);
537 s->signbias[2] = get_bits1(&s->gb);
538 if (!s->refs[s->refidx[0]].f->data[0] ||
539 !s->refs[s->refidx[1]].f->data[0] ||
540 !s->refs[s->refidx[2]].f->data[0]) {
541 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
542 return AVERROR_INVALIDDATA;
544 if (get_bits1(&s->gb)) {
545 w = s->refs[s->refidx[0]].f->width;
546 h = s->refs[s->refidx[0]].f->height;
547 } else if (get_bits1(&s->gb)) {
548 w = s->refs[s->refidx[1]].f->width;
549 h = s->refs[s->refidx[1]].f->height;
550 } else if (get_bits1(&s->gb)) {
551 w = s->refs[s->refidx[2]].f->width;
552 h = s->refs[s->refidx[2]].f->height;
554 w = get_bits(&s->gb, 16) + 1;
555 h = get_bits(&s->gb, 16) + 1;
557 // Note that in this code, "CUR_FRAME" is actually before we
558 // have formally allocated a frame, and thus actually represents
560 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
561 s->frames[CUR_FRAME].tf.f->height == h;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
564 s->highprecisionmvs = get_bits1(&s->gb);
565 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
567 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
568 s->signbias[0] != s->signbias[2];
569 if (s->allowcompinter) {
570 if (s->signbias[0] == s->signbias[1]) {
572 s->varcompref[0] = 0;
573 s->varcompref[1] = 1;
574 } else if (s->signbias[0] == s->signbias[2]) {
576 s->varcompref[0] = 0;
577 s->varcompref[1] = 2;
580 s->varcompref[0] = 1;
581 s->varcompref[1] = 2;
586 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
587 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
588 s->framectxid = c = get_bits(&s->gb, 2);
590 /* loopfilter header data */
591 s->filter.level = get_bits(&s->gb, 6);
592 sharp = get_bits(&s->gb, 3);
593 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
594 // the old cache values since they are still valid
595 if (s->filter.sharpness != sharp)
596 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
597 s->filter.sharpness = sharp;
598 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
599 if (get_bits1(&s->gb)) {
600 for (i = 0; i < 4; i++)
601 if (get_bits1(&s->gb))
602 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
603 for (i = 0; i < 2; i++)
604 if (get_bits1(&s->gb))
605 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
608 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
611 /* quantization header data */
612 s->yac_qi = get_bits(&s->gb, 8);
613 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
614 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
615 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
616 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
617 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
619 /* segmentation header info */
620 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
621 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
622 for (i = 0; i < 7; i++)
623 s->prob.seg[i] = get_bits1(&s->gb) ?
624 get_bits(&s->gb, 8) : 255;
625 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
626 for (i = 0; i < 3; i++)
627 s->prob.segpred[i] = get_bits1(&s->gb) ?
628 get_bits(&s->gb, 8) : 255;
631 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
632 (w != s->frames[CUR_FRAME].tf.f->width ||
633 h != s->frames[CUR_FRAME].tf.f->height)) {
634 av_log(ctx, AV_LOG_ERROR,
635 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
636 s->segmentation.temporal, s->segmentation.update_map);
637 return AVERROR_INVALIDDATA;
640 if (get_bits1(&s->gb)) {
641 s->segmentation.absolute_vals = get_bits1(&s->gb);
642 for (i = 0; i < 8; i++) {
643 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
644 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
645 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
646 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
647 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
648 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
649 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
653 s->segmentation.feat[0].q_enabled = 0;
654 s->segmentation.feat[0].lf_enabled = 0;
655 s->segmentation.feat[0].skip_enabled = 0;
656 s->segmentation.feat[0].ref_enabled = 0;
659 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
660 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
661 int qyac, qydc, quvac, quvdc, lflvl, sh;
663 if (s->segmentation.feat[i].q_enabled) {
664 if (s->segmentation.absolute_vals)
665 qyac = s->segmentation.feat[i].q_val;
667 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
671 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
672 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
673 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
674 qyac = av_clip_uintp2(qyac, 8);
676 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
677 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
678 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
679 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
681 sh = s->filter.level >= 32;
682 if (s->segmentation.feat[i].lf_enabled) {
683 if (s->segmentation.absolute_vals)
684 lflvl = s->segmentation.feat[i].lf_val;
686 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
688 lflvl = s->filter.level;
690 s->segmentation.feat[i].lflvl[0][0] =
691 s->segmentation.feat[i].lflvl[0][1] =
692 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
693 for (j = 1; j < 4; j++) {
694 s->segmentation.feat[i].lflvl[j][0] =
695 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
696 s->lf_delta.mode[0]) << sh), 6);
697 s->segmentation.feat[i].lflvl[j][1] =
698 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
699 s->lf_delta.mode[1]) << sh), 6);
704 if ((res = update_size(ctx, w, h)) < 0) {
705 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
708 for (s->tiling.log2_tile_cols = 0;
709 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
710 s->tiling.log2_tile_cols++) ;
711 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
712 max = FFMAX(0, max - 1);
713 while (max > s->tiling.log2_tile_cols) {
714 if (get_bits1(&s->gb))
715 s->tiling.log2_tile_cols++;
719 s->tiling.log2_tile_rows = decode012(&s->gb);
720 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
721 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
722 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
723 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
724 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
726 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
727 return AVERROR(ENOMEM);
731 if (s->keyframe || s->errorres || s->intraonly) {
732 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
733 s->prob_ctx[3].p = vp9_default_probs;
734 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
735 sizeof(vp9_default_coef_probs));
736 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
737 sizeof(vp9_default_coef_probs));
738 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
739 sizeof(vp9_default_coef_probs));
740 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
741 sizeof(vp9_default_coef_probs));
744 // next 16 bits is size of the rest of the header (arith-coded)
745 size2 = get_bits(&s->gb, 16);
746 data2 = align_get_bits(&s->gb);
747 if (size2 > size - (data2 - data)) {
748 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
749 return AVERROR_INVALIDDATA;
751 ff_vp56_init_range_decoder(&s->c, data2, size2);
752 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
753 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
754 return AVERROR_INVALIDDATA;
757 if (s->keyframe || s->intraonly) {
758 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
760 memset(&s->counts, 0, sizeof(s->counts));
762 // FIXME is it faster to not copy here, but do it down in the fw updates
763 // as explicit copies if the fw update is missing (and skip the copy upon
765 s->prob.p = s->prob_ctx[c].p;
769 s->txfmmode = TX_4X4;
771 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
772 if (s->txfmmode == 3)
773 s->txfmmode += vp8_rac_get(&s->c);
775 if (s->txfmmode == TX_SWITCHABLE) {
776 for (i = 0; i < 2; i++)
777 if (vp56_rac_get_prob_branchy(&s->c, 252))
778 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
779 for (i = 0; i < 2; i++)
780 for (j = 0; j < 2; j++)
781 if (vp56_rac_get_prob_branchy(&s->c, 252))
782 s->prob.p.tx16p[i][j] =
783 update_prob(&s->c, s->prob.p.tx16p[i][j]);
784 for (i = 0; i < 2; i++)
785 for (j = 0; j < 3; j++)
786 if (vp56_rac_get_prob_branchy(&s->c, 252))
787 s->prob.p.tx32p[i][j] =
788 update_prob(&s->c, s->prob.p.tx32p[i][j]);
793 for (i = 0; i < 4; i++) {
794 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
795 if (vp8_rac_get(&s->c)) {
796 for (j = 0; j < 2; j++)
797 for (k = 0; k < 2; k++)
798 for (l = 0; l < 6; l++)
799 for (m = 0; m < 6; m++) {
800 uint8_t *p = s->prob.coef[i][j][k][l][m];
801 uint8_t *r = ref[j][k][l][m];
802 if (m >= 3 && l == 0) // dc only has 3 pt
804 for (n = 0; n < 3; n++) {
805 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
806 p[n] = update_prob(&s->c, r[n]);
814 for (j = 0; j < 2; j++)
815 for (k = 0; k < 2; k++)
816 for (l = 0; l < 6; l++)
817 for (m = 0; m < 6; m++) {
818 uint8_t *p = s->prob.coef[i][j][k][l][m];
819 uint8_t *r = ref[j][k][l][m];
820 if (m > 3 && l == 0) // dc only has 3 pt
826 if (s->txfmmode == i)
831 for (i = 0; i < 3; i++)
832 if (vp56_rac_get_prob_branchy(&s->c, 252))
833 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
834 if (!s->keyframe && !s->intraonly) {
835 for (i = 0; i < 7; i++)
836 for (j = 0; j < 3; j++)
837 if (vp56_rac_get_prob_branchy(&s->c, 252))
838 s->prob.p.mv_mode[i][j] =
839 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
841 if (s->filtermode == FILTER_SWITCHABLE)
842 for (i = 0; i < 4; i++)
843 for (j = 0; j < 2; j++)
844 if (vp56_rac_get_prob_branchy(&s->c, 252))
845 s->prob.p.filter[i][j] =
846 update_prob(&s->c, s->prob.p.filter[i][j]);
848 for (i = 0; i < 4; i++)
849 if (vp56_rac_get_prob_branchy(&s->c, 252))
850 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
852 if (s->allowcompinter) {
853 s->comppredmode = vp8_rac_get(&s->c);
855 s->comppredmode += vp8_rac_get(&s->c);
856 if (s->comppredmode == PRED_SWITCHABLE)
857 for (i = 0; i < 5; i++)
858 if (vp56_rac_get_prob_branchy(&s->c, 252))
860 update_prob(&s->c, s->prob.p.comp[i]);
862 s->comppredmode = PRED_SINGLEREF;
865 if (s->comppredmode != PRED_COMPREF) {
866 for (i = 0; i < 5; i++) {
867 if (vp56_rac_get_prob_branchy(&s->c, 252))
868 s->prob.p.single_ref[i][0] =
869 update_prob(&s->c, s->prob.p.single_ref[i][0]);
870 if (vp56_rac_get_prob_branchy(&s->c, 252))
871 s->prob.p.single_ref[i][1] =
872 update_prob(&s->c, s->prob.p.single_ref[i][1]);
876 if (s->comppredmode != PRED_SINGLEREF) {
877 for (i = 0; i < 5; i++)
878 if (vp56_rac_get_prob_branchy(&s->c, 252))
879 s->prob.p.comp_ref[i] =
880 update_prob(&s->c, s->prob.p.comp_ref[i]);
883 for (i = 0; i < 4; i++)
884 for (j = 0; j < 9; j++)
885 if (vp56_rac_get_prob_branchy(&s->c, 252))
886 s->prob.p.y_mode[i][j] =
887 update_prob(&s->c, s->prob.p.y_mode[i][j]);
889 for (i = 0; i < 4; i++)
890 for (j = 0; j < 4; j++)
891 for (k = 0; k < 3; k++)
892 if (vp56_rac_get_prob_branchy(&s->c, 252))
893 s->prob.p.partition[3 - i][j][k] =
894 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
896 // mv fields don't use the update_prob subexp model for some reason
897 for (i = 0; i < 3; i++)
898 if (vp56_rac_get_prob_branchy(&s->c, 252))
899 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
901 for (i = 0; i < 2; i++) {
902 if (vp56_rac_get_prob_branchy(&s->c, 252))
903 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
905 for (j = 0; j < 10; j++)
906 if (vp56_rac_get_prob_branchy(&s->c, 252))
907 s->prob.p.mv_comp[i].classes[j] =
908 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
910 if (vp56_rac_get_prob_branchy(&s->c, 252))
911 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
913 for (j = 0; j < 10; j++)
914 if (vp56_rac_get_prob_branchy(&s->c, 252))
915 s->prob.p.mv_comp[i].bits[j] =
916 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
919 for (i = 0; i < 2; i++) {
920 for (j = 0; j < 2; j++)
921 for (k = 0; k < 3; k++)
922 if (vp56_rac_get_prob_branchy(&s->c, 252))
923 s->prob.p.mv_comp[i].class0_fp[j][k] =
924 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
926 for (j = 0; j < 3; j++)
927 if (vp56_rac_get_prob_branchy(&s->c, 252))
928 s->prob.p.mv_comp[i].fp[j] =
929 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
932 if (s->highprecisionmvs) {
933 for (i = 0; i < 2; i++) {
934 if (vp56_rac_get_prob_branchy(&s->c, 252))
935 s->prob.p.mv_comp[i].class0_hp =
936 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
938 if (vp56_rac_get_prob_branchy(&s->c, 252))
939 s->prob.p.mv_comp[i].hp =
940 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
945 return (data2 - data) + size2;
948 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
951 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
952 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
955 static void find_ref_mvs(VP9Context *s,
956 VP56mv *pmv, int ref, int z, int idx, int sb)
958 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
959 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
960 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
961 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
962 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
963 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
964 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
965 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
966 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
967 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
968 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
969 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
970 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
971 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
972 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
973 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
974 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
975 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
976 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
977 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
978 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
979 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
980 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
981 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
982 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
983 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
984 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
987 int row = s->row, col = s->col, row7 = s->row7;
988 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
989 #define INVALID_MV 0x80008000U
990 uint32_t mem = INVALID_MV;
993 #define RETURN_DIRECT_MV(mv) \
995 uint32_t m = AV_RN32A(&mv); \
999 } else if (mem == INVALID_MV) { \
1001 } else if (m != mem) { \
1008 if (sb == 2 || sb == 1) {
1009 RETURN_DIRECT_MV(b->mv[0][z]);
1010 } else if (sb == 3) {
1011 RETURN_DIRECT_MV(b->mv[2][z]);
1012 RETURN_DIRECT_MV(b->mv[1][z]);
1013 RETURN_DIRECT_MV(b->mv[0][z]);
1016 #define RETURN_MV(mv) \
1021 clamp_mv(&tmp, &mv, s); \
1022 m = AV_RN32A(&tmp); \
1026 } else if (mem == INVALID_MV) { \
1028 } else if (m != mem) { \
1033 uint32_t m = AV_RN32A(&mv); \
1035 clamp_mv(pmv, &mv, s); \
1037 } else if (mem == INVALID_MV) { \
1039 } else if (m != mem) { \
1040 clamp_mv(pmv, &mv, s); \
1047 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1048 if (mv->ref[0] == ref) {
1049 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1050 } else if (mv->ref[1] == ref) {
1051 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1054 if (col > s->tiling.tile_col_start) {
1055 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1056 if (mv->ref[0] == ref) {
1057 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1058 } else if (mv->ref[1] == ref) {
1059 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1067 // previously coded MVs in this neighbourhood, using same reference frame
1068 for (; i < 8; i++) {
1069 int c = p[i][0] + col, r = p[i][1] + row;
1071 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1072 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1074 if (mv->ref[0] == ref) {
1075 RETURN_MV(mv->mv[0]);
1076 } else if (mv->ref[1] == ref) {
1077 RETURN_MV(mv->mv[1]);
1082 // MV at this position in previous frame, using same reference frame
1083 if (s->use_last_frame_mvs) {
1084 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1086 if (!s->last_uses_2pass)
1087 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1088 if (mv->ref[0] == ref) {
1089 RETURN_MV(mv->mv[0]);
1090 } else if (mv->ref[1] == ref) {
1091 RETURN_MV(mv->mv[1]);
1095 #define RETURN_SCALE_MV(mv, scale) \
1098 VP56mv mv_temp = { -mv.x, -mv.y }; \
1099 RETURN_MV(mv_temp); \
1105 // previously coded MVs in this neighbourhood, using different reference frame
1106 for (i = 0; i < 8; i++) {
1107 int c = p[i][0] + col, r = p[i][1] + row;
1109 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1110 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1112 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1113 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1115 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1116 // BUG - libvpx has this condition regardless of whether
1117 // we used the first ref MV and pre-scaling
1118 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1119 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1124 // MV at this position in previous frame, using different reference frame
1125 if (s->use_last_frame_mvs) {
1126 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1128 // no need to await_progress, because we already did that above
1129 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1130 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1132 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1133 // BUG - libvpx has this condition regardless of whether
1134 // we used the first ref MV and pre-scaling
1135 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1136 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1143 #undef RETURN_SCALE_MV
1146 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1148 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1149 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1150 s->prob.p.mv_comp[idx].classes);
1152 s->counts.mv_comp[idx].sign[sign]++;
1153 s->counts.mv_comp[idx].classes[c]++;
1157 for (n = 0, m = 0; m < c; m++) {
1158 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1160 s->counts.mv_comp[idx].bits[m][bit]++;
1163 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1165 s->counts.mv_comp[idx].fp[bit]++;
1167 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1168 s->counts.mv_comp[idx].hp[bit]++;
1172 // bug in libvpx - we count for bw entropy purposes even if the
1174 s->counts.mv_comp[idx].hp[1]++;
1178 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1179 s->counts.mv_comp[idx].class0[n]++;
1180 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1181 s->prob.p.mv_comp[idx].class0_fp[n]);
1182 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1183 n = (n << 3) | (bit << 1);
1185 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1186 s->counts.mv_comp[idx].class0_hp[bit]++;
1190 // bug in libvpx - we count for bw entropy purposes even if the
1192 s->counts.mv_comp[idx].class0_hp[1]++;
1196 return sign ? -(n + 1) : (n + 1);
1199 static void fill_mv(VP9Context *s,
1200 VP56mv *mv, int mode, int sb)
1204 if (mode == ZEROMV) {
1209 // FIXME cache this value and reuse for other subblocks
1210 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1211 mode == NEWMV ? -1 : sb);
1212 // FIXME maybe move this code into find_ref_mvs()
1213 if ((mode == NEWMV || sb == -1) &&
1214 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1228 if (mode == NEWMV) {
1229 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1230 s->prob.p.mv_joint);
1232 s->counts.mv_joint[j]++;
1233 if (j >= MV_JOINT_V)
1234 mv[0].y += read_mv_component(s, 0, hp);
1236 mv[0].x += read_mv_component(s, 1, hp);
1240 // FIXME cache this value and reuse for other subblocks
1241 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1242 mode == NEWMV ? -1 : sb);
1243 if ((mode == NEWMV || sb == -1) &&
1244 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1258 if (mode == NEWMV) {
1259 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1260 s->prob.p.mv_joint);
1262 s->counts.mv_joint[j]++;
1263 if (j >= MV_JOINT_V)
1264 mv[1].y += read_mv_component(s, 0, hp);
1266 mv[1].x += read_mv_component(s, 1, hp);
1272 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1273 ptrdiff_t stride, int v)
1283 int v16 = v * 0x0101;
1291 uint32_t v32 = v * 0x01010101;
1300 uint64_t v64 = v * 0x0101010101010101ULL;
1306 uint32_t v32 = v * 0x01010101;
1309 AV_WN32A(ptr + 4, v32);
1318 static void decode_mode(AVCodecContext *ctx)
1320 static const uint8_t left_ctx[N_BS_SIZES] = {
1321 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1323 static const uint8_t above_ctx[N_BS_SIZES] = {
1324 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1326 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1327 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1328 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1330 VP9Context *s = ctx->priv_data;
1332 int row = s->row, col = s->col, row7 = s->row7;
1333 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1334 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1335 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1336 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1337 int vref, filter_id;
1339 if (!s->segmentation.enabled) {
1341 } else if (s->keyframe || s->intraonly) {
1342 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1343 } else if (!s->segmentation.update_map ||
1344 (s->segmentation.temporal &&
1345 vp56_rac_get_prob_branchy(&s->c,
1346 s->prob.segpred[s->above_segpred_ctx[col] +
1347 s->left_segpred_ctx[row7]]))) {
1350 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1352 if (!s->last_uses_2pass)
1353 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1354 for (y = 0; y < h4; y++)
1355 for (x = 0; x < w4; x++)
1356 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1357 av_assert1(pred < 8);
1363 memset(&s->above_segpred_ctx[col], 1, w4);
1364 memset(&s->left_segpred_ctx[row7], 1, h4);
1366 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1369 memset(&s->above_segpred_ctx[col], 0, w4);
1370 memset(&s->left_segpred_ctx[row7], 0, h4);
1372 if (s->segmentation.enabled &&
1373 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1374 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1375 w4, h4, 8 * s->sb_cols, b->seg_id);
1378 b->skip = s->segmentation.enabled &&
1379 s->segmentation.feat[b->seg_id].skip_enabled;
1381 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1382 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1383 s->counts.skip[c][b->skip]++;
1386 if (s->keyframe || s->intraonly) {
1388 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1389 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1393 if (have_a && have_l) {
1394 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1397 c = have_a ? 2 * s->above_intra_ctx[col] :
1398 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1400 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1401 s->counts.intra[c][bit]++;
1405 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1409 c = (s->above_skip_ctx[col] ? max_tx :
1410 s->above_txfm_ctx[col]) +
1411 (s->left_skip_ctx[row7] ? max_tx :
1412 s->left_txfm_ctx[row7]) > max_tx;
1414 c = s->above_skip_ctx[col] ? 1 :
1415 (s->above_txfm_ctx[col] * 2 > max_tx);
1417 } else if (have_l) {
1418 c = s->left_skip_ctx[row7] ? 1 :
1419 (s->left_txfm_ctx[row7] * 2 > max_tx);
1425 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1427 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1429 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1431 s->counts.tx32p[c][b->tx]++;
1434 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1436 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1437 s->counts.tx16p[c][b->tx]++;
1440 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1441 s->counts.tx8p[c][b->tx]++;
1448 b->tx = FFMIN(max_tx, s->txfmmode);
1451 if (s->keyframe || s->intraonly) {
1452 uint8_t *a = &s->above_mode_ctx[col * 2];
1453 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1456 if (b->bs > BS_8x8) {
1457 // FIXME the memory storage intermediates here aren't really
1458 // necessary, they're just there to make the code slightly
1460 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1461 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1462 if (b->bs != BS_8x4) {
1463 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1464 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1465 l[0] = a[1] = b->mode[1];
1467 l[0] = a[1] = b->mode[1] = b->mode[0];
1469 if (b->bs != BS_4x8) {
1470 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1471 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1472 if (b->bs != BS_8x4) {
1473 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1474 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1475 l[1] = a[1] = b->mode[3];
1477 l[1] = a[1] = b->mode[3] = b->mode[2];
1480 b->mode[2] = b->mode[0];
1481 l[1] = a[1] = b->mode[3] = b->mode[1];
1484 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1485 vp9_default_kf_ymode_probs[*a][*l]);
1486 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1487 // FIXME this can probably be optimized
1488 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1489 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1491 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1492 vp9_default_kf_uvmode_probs[b->mode[3]]);
1493 } else if (b->intra) {
1495 if (b->bs > BS_8x8) {
1496 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1497 s->prob.p.y_mode[0]);
1498 s->counts.y_mode[0][b->mode[0]]++;
1499 if (b->bs != BS_8x4) {
1500 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1501 s->prob.p.y_mode[0]);
1502 s->counts.y_mode[0][b->mode[1]]++;
1504 b->mode[1] = b->mode[0];
1506 if (b->bs != BS_4x8) {
1507 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1508 s->prob.p.y_mode[0]);
1509 s->counts.y_mode[0][b->mode[2]]++;
1510 if (b->bs != BS_8x4) {
1511 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1512 s->prob.p.y_mode[0]);
1513 s->counts.y_mode[0][b->mode[3]]++;
1515 b->mode[3] = b->mode[2];
1518 b->mode[2] = b->mode[0];
1519 b->mode[3] = b->mode[1];
1522 static const uint8_t size_group[10] = {
1523 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1525 int sz = size_group[b->bs];
1527 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1528 s->prob.p.y_mode[sz]);
1529 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1530 s->counts.y_mode[sz][b->mode[3]]++;
1532 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1533 s->prob.p.uv_mode[b->mode[3]]);
1534 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1536 static const uint8_t inter_mode_ctx_lut[14][14] = {
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1543 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1544 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1545 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1546 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1547 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1548 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1549 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1550 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1553 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1554 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1556 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1558 // read comp_pred flag
1559 if (s->comppredmode != PRED_SWITCHABLE) {
1560 b->comp = s->comppredmode == PRED_COMPREF;
1564 // FIXME add intra as ref=0xff (or -1) to make these easier?
1567 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1569 } else if (s->above_comp_ctx[col]) {
1570 c = 2 + (s->left_intra_ctx[row7] ||
1571 s->left_ref_ctx[row7] == s->fixcompref);
1572 } else if (s->left_comp_ctx[row7]) {
1573 c = 2 + (s->above_intra_ctx[col] ||
1574 s->above_ref_ctx[col] == s->fixcompref);
1576 c = (!s->above_intra_ctx[col] &&
1577 s->above_ref_ctx[col] == s->fixcompref) ^
1578 (!s->left_intra_ctx[row7] &&
1579 s->left_ref_ctx[row & 7] == s->fixcompref);
1582 c = s->above_comp_ctx[col] ? 3 :
1583 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1585 } else if (have_l) {
1586 c = s->left_comp_ctx[row7] ? 3 :
1587 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1591 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1592 s->counts.comp[c][b->comp]++;
1595 // read actual references
1596 // FIXME probably cache a few variables here to prevent repetitive
1597 // memory accesses below
1598 if (b->comp) /* two references */ {
1599 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1601 b->ref[fix_idx] = s->fixcompref;
1602 // FIXME can this codeblob be replaced by some sort of LUT?
1605 if (s->above_intra_ctx[col]) {
1606 if (s->left_intra_ctx[row7]) {
1609 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1611 } else if (s->left_intra_ctx[row7]) {
1612 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1614 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1616 if (refl == refa && refa == s->varcompref[1]) {
1618 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1619 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1620 (refl == s->fixcompref && refa == s->varcompref[0])) {
1623 c = (refa == refl) ? 3 : 1;
1625 } else if (!s->left_comp_ctx[row7]) {
1626 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1629 c = (refl == s->varcompref[1] &&
1630 refa != s->varcompref[1]) ? 2 : 4;
1632 } else if (!s->above_comp_ctx[col]) {
1633 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1636 c = (refa == s->varcompref[1] &&
1637 refl != s->varcompref[1]) ? 2 : 4;
1640 c = (refl == refa) ? 4 : 2;
1644 if (s->above_intra_ctx[col]) {
1646 } else if (s->above_comp_ctx[col]) {
1647 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1649 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1652 } else if (have_l) {
1653 if (s->left_intra_ctx[row7]) {
1655 } else if (s->left_comp_ctx[row7]) {
1656 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1658 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1663 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1664 b->ref[var_idx] = s->varcompref[bit];
1665 s->counts.comp_ref[c][bit]++;
1666 } else /* single reference */ {
1669 if (have_a && !s->above_intra_ctx[col]) {
1670 if (have_l && !s->left_intra_ctx[row7]) {
1671 if (s->left_comp_ctx[row7]) {
1672 if (s->above_comp_ctx[col]) {
1673 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1674 !s->above_ref_ctx[col]);
1676 c = (3 * !s->above_ref_ctx[col]) +
1677 (!s->fixcompref || !s->left_ref_ctx[row7]);
1679 } else if (s->above_comp_ctx[col]) {
1680 c = (3 * !s->left_ref_ctx[row7]) +
1681 (!s->fixcompref || !s->above_ref_ctx[col]);
1683 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1685 } else if (s->above_intra_ctx[col]) {
1687 } else if (s->above_comp_ctx[col]) {
1688 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1690 c = 4 * (!s->above_ref_ctx[col]);
1692 } else if (have_l && !s->left_intra_ctx[row7]) {
1693 if (s->left_intra_ctx[row7]) {
1695 } else if (s->left_comp_ctx[row7]) {
1696 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1698 c = 4 * (!s->left_ref_ctx[row7]);
1703 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1704 s->counts.single_ref[c][0][bit]++;
1708 // FIXME can this codeblob be replaced by some sort of LUT?
1711 if (s->left_intra_ctx[row7]) {
1712 if (s->above_intra_ctx[col]) {
1714 } else if (s->above_comp_ctx[col]) {
1715 c = 1 + 2 * (s->fixcompref == 1 ||
1716 s->above_ref_ctx[col] == 1);
1717 } else if (!s->above_ref_ctx[col]) {
1720 c = 4 * (s->above_ref_ctx[col] == 1);
1722 } else if (s->above_intra_ctx[col]) {
1723 if (s->left_intra_ctx[row7]) {
1725 } else if (s->left_comp_ctx[row7]) {
1726 c = 1 + 2 * (s->fixcompref == 1 ||
1727 s->left_ref_ctx[row7] == 1);
1728 } else if (!s->left_ref_ctx[row7]) {
1731 c = 4 * (s->left_ref_ctx[row7] == 1);
1733 } else if (s->above_comp_ctx[col]) {
1734 if (s->left_comp_ctx[row7]) {
1735 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1736 c = 3 * (s->fixcompref == 1 ||
1737 s->left_ref_ctx[row7] == 1);
1741 } else if (!s->left_ref_ctx[row7]) {
1742 c = 1 + 2 * (s->fixcompref == 1 ||
1743 s->above_ref_ctx[col] == 1);
1745 c = 3 * (s->left_ref_ctx[row7] == 1) +
1746 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1748 } else if (s->left_comp_ctx[row7]) {
1749 if (!s->above_ref_ctx[col]) {
1750 c = 1 + 2 * (s->fixcompref == 1 ||
1751 s->left_ref_ctx[row7] == 1);
1753 c = 3 * (s->above_ref_ctx[col] == 1) +
1754 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1756 } else if (!s->above_ref_ctx[col]) {
1757 if (!s->left_ref_ctx[row7]) {
1760 c = 4 * (s->left_ref_ctx[row7] == 1);
1762 } else if (!s->left_ref_ctx[row7]) {
1763 c = 4 * (s->above_ref_ctx[col] == 1);
1765 c = 2 * (s->left_ref_ctx[row7] == 1) +
1766 2 * (s->above_ref_ctx[col] == 1);
1769 if (s->above_intra_ctx[col] ||
1770 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1772 } else if (s->above_comp_ctx[col]) {
1773 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1775 c = 4 * (s->above_ref_ctx[col] == 1);
1778 } else if (have_l) {
1779 if (s->left_intra_ctx[row7] ||
1780 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1782 } else if (s->left_comp_ctx[row7]) {
1783 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1785 c = 4 * (s->left_ref_ctx[row7] == 1);
1790 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1791 s->counts.single_ref[c][1][bit]++;
1792 b->ref[0] = 1 + bit;
1797 if (b->bs <= BS_8x8) {
1798 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1799 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1801 static const uint8_t off[10] = {
1802 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1805 // FIXME this needs to use the LUT tables from find_ref_mvs
1806 // because not all are -1,0/0,-1
1807 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1808 [s->left_mode_ctx[row7 + off[b->bs]]];
1810 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1811 s->prob.p.mv_mode[c]);
1812 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1813 s->counts.mv_mode[c][b->mode[0] - 10]++;
1817 if (s->filtermode == FILTER_SWITCHABLE) {
1820 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1821 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1822 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1823 s->left_filter_ctx[row7] : 3;
1825 c = s->above_filter_ctx[col];
1827 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1828 c = s->left_filter_ctx[row7];
1833 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1834 s->prob.p.filter[c]);
1835 s->counts.filter[c][filter_id]++;
1836 b->filter = vp9_filter_lut[filter_id];
1838 b->filter = s->filtermode;
1841 if (b->bs > BS_8x8) {
1842 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1844 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1845 s->prob.p.mv_mode[c]);
1846 s->counts.mv_mode[c][b->mode[0] - 10]++;
1847 fill_mv(s, b->mv[0], b->mode[0], 0);
1849 if (b->bs != BS_8x4) {
1850 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1851 s->prob.p.mv_mode[c]);
1852 s->counts.mv_mode[c][b->mode[1] - 10]++;
1853 fill_mv(s, b->mv[1], b->mode[1], 1);
1855 b->mode[1] = b->mode[0];
1856 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1857 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1860 if (b->bs != BS_4x8) {
1861 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1862 s->prob.p.mv_mode[c]);
1863 s->counts.mv_mode[c][b->mode[2] - 10]++;
1864 fill_mv(s, b->mv[2], b->mode[2], 2);
1866 if (b->bs != BS_8x4) {
1867 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1868 s->prob.p.mv_mode[c]);
1869 s->counts.mv_mode[c][b->mode[3] - 10]++;
1870 fill_mv(s, b->mv[3], b->mode[3], 3);
1872 b->mode[3] = b->mode[2];
1873 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1874 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1877 b->mode[2] = b->mode[0];
1878 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1879 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1880 b->mode[3] = b->mode[1];
1881 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1882 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1885 fill_mv(s, b->mv[0], b->mode[0], -1);
1886 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1887 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1888 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1889 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1890 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1891 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1894 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1898 #define SPLAT_CTX(var, val, n) \
1900 case 1: var = val; break; \
1901 case 2: AV_WN16A(&var, val * 0x0101); break; \
1902 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1903 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1905 uint64_t v64 = val * 0x0101010101010101ULL; \
1906 AV_WN64A( &var, v64); \
1907 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1912 #define SPLAT_CTX(var, val, n) \
1914 case 1: var = val; break; \
1915 case 2: AV_WN16A(&var, val * 0x0101); break; \
1916 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1918 uint32_t v32 = val * 0x01010101; \
1919 AV_WN32A( &var, v32); \
1920 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1924 uint32_t v32 = val * 0x01010101; \
1925 AV_WN32A( &var, v32); \
1926 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1927 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1928 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1934 switch (bwh_tab[1][b->bs][0]) {
1935 #define SET_CTXS(dir, off, n) \
1937 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1938 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1939 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1940 if (!s->keyframe && !s->intraonly) { \
1941 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1942 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1943 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1945 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1946 if (s->filtermode == FILTER_SWITCHABLE) { \
1947 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1952 case 1: SET_CTXS(above, col, 1); break;
1953 case 2: SET_CTXS(above, col, 2); break;
1954 case 4: SET_CTXS(above, col, 4); break;
1955 case 8: SET_CTXS(above, col, 8); break;
1957 switch (bwh_tab[1][b->bs][1]) {
1958 case 1: SET_CTXS(left, row7, 1); break;
1959 case 2: SET_CTXS(left, row7, 2); break;
1960 case 4: SET_CTXS(left, row7, 4); break;
1961 case 8: SET_CTXS(left, row7, 8); break;
1966 if (!s->keyframe && !s->intraonly) {
1967 if (b->bs > BS_8x8) {
1968 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1970 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1971 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1972 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1973 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1974 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1975 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1976 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1977 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1979 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1981 for (n = 0; n < w4 * 2; n++) {
1982 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1983 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1985 for (n = 0; n < h4 * 2; n++) {
1986 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1987 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1993 for (y = 0; y < h4; y++) {
1994 int x, o = (row + y) * s->sb_cols * 8 + col;
1995 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1998 for (x = 0; x < w4; x++) {
2002 } else if (b->comp) {
2003 for (x = 0; x < w4; x++) {
2004 mv[x].ref[0] = b->ref[0];
2005 mv[x].ref[1] = b->ref[1];
2006 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2007 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2010 for (x = 0; x < w4; x++) {
2011 mv[x].ref[0] = b->ref[0];
2013 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2019 // FIXME merge cnt/eob arguments?
2020 static av_always_inline int
2021 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2022 int is_tx32x32, unsigned (*cnt)[6][3],
2023 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2024 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2025 const int16_t *band_counts, const int16_t *qmul)
2027 int i = 0, band = 0, band_left = band_counts[band];
2028 uint8_t *tp = p[0][nnz];
2029 uint8_t cache[1024];
2034 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2035 eob[band][nnz][val]++;
2040 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2041 cnt[band][nnz][0]++;
2043 band_left = band_counts[++band];
2045 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2047 if (++i == n_coeffs)
2048 break; //invalid input; blocks should end with EOB
2053 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2054 cnt[band][nnz][1]++;
2058 // fill in p[3-10] (model fill) - only once per frame for each pos
2060 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2062 cnt[band][nnz][2]++;
2063 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2064 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2065 cache[rc] = val = 2;
2067 val = 3 + vp56_rac_get_prob(c, tp[5]);
2070 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2072 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2073 val = 5 + vp56_rac_get_prob(c, 159);
2075 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2076 val += vp56_rac_get_prob(c, 145);
2080 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2081 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2082 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2083 val += (vp56_rac_get_prob(c, 148) << 1);
2084 val += vp56_rac_get_prob(c, 140);
2086 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2087 val += (vp56_rac_get_prob(c, 155) << 2);
2088 val += (vp56_rac_get_prob(c, 140) << 1);
2089 val += vp56_rac_get_prob(c, 135);
2091 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2092 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2093 val += (vp56_rac_get_prob(c, 157) << 3);
2094 val += (vp56_rac_get_prob(c, 141) << 2);
2095 val += (vp56_rac_get_prob(c, 134) << 1);
2096 val += vp56_rac_get_prob(c, 130);
2098 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2099 val += (vp56_rac_get_prob(c, 254) << 12);
2100 val += (vp56_rac_get_prob(c, 254) << 11);
2101 val += (vp56_rac_get_prob(c, 252) << 10);
2102 val += (vp56_rac_get_prob(c, 249) << 9);
2103 val += (vp56_rac_get_prob(c, 243) << 8);
2104 val += (vp56_rac_get_prob(c, 230) << 7);
2105 val += (vp56_rac_get_prob(c, 196) << 6);
2106 val += (vp56_rac_get_prob(c, 177) << 5);
2107 val += (vp56_rac_get_prob(c, 153) << 4);
2108 val += (vp56_rac_get_prob(c, 140) << 3);
2109 val += (vp56_rac_get_prob(c, 133) << 2);
2110 val += (vp56_rac_get_prob(c, 130) << 1);
2111 val += vp56_rac_get_prob(c, 129);
2116 band_left = band_counts[++band];
2118 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2120 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2121 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2123 } while (++i < n_coeffs);
2128 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2129 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2130 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2131 const int16_t (*nb)[2], const int16_t *band_counts,
2132 const int16_t *qmul)
2134 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2135 nnz, scan, nb, band_counts, qmul);
2138 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2139 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2140 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2141 const int16_t (*nb)[2], const int16_t *band_counts,
2142 const int16_t *qmul)
2144 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2145 nnz, scan, nb, band_counts, qmul);
2148 static void decode_coeffs(AVCodecContext *ctx)
2150 VP9Context *s = ctx->priv_data;
2152 int row = s->row, col = s->col;
2153 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2154 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2155 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2156 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2157 int end_x = FFMIN(2 * (s->cols - col), w4);
2158 int end_y = FFMIN(2 * (s->rows - row), h4);
2159 int n, pl, x, y, res;
2160 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2161 int tx = 4 * s->lossless + b->tx;
2162 const int16_t * const *yscans = vp9_scans[tx];
2163 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2164 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2165 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2166 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2167 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2168 static const int16_t band_counts[4][8] = {
2169 { 1, 2, 3, 4, 3, 16 - 13 },
2170 { 1, 2, 3, 4, 11, 64 - 21 },
2171 { 1, 2, 3, 4, 11, 256 - 21 },
2172 { 1, 2, 3, 4, 11, 1024 - 21 },
2174 const int16_t *y_band_counts = band_counts[b->tx];
2175 const int16_t *uv_band_counts = band_counts[b->uvtx];
2177 #define MERGE(la, end, step, rd) \
2178 for (n = 0; n < end; n += step) \
2179 la[n] = !!rd(&la[n])
2180 #define MERGE_CTX(step, rd) \
2182 MERGE(l, end_y, step, rd); \
2183 MERGE(a, end_x, step, rd); \
2186 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2187 for (n = 0, y = 0; y < end_y; y += step) { \
2188 for (x = 0; x < end_x; x += step, n += step * step) { \
2189 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2190 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2191 c, e, p, a[x] + l[y], yscans[txtp], \
2192 ynbs[txtp], y_band_counts, qmul[0]); \
2193 a[x] = l[y] = !!res; \
2195 AV_WN16A(&s->eob[n], res); \
2202 #define SPLAT(la, end, step, cond) \
2204 for (n = 1; n < end; n += step) \
2205 la[n] = la[n - 1]; \
2206 } else if (step == 4) { \
2208 for (n = 0; n < end; n += step) \
2209 AV_WN32A(&la[n], la[n] * 0x01010101); \
2211 for (n = 0; n < end; n += step) \
2212 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2214 } else /* step == 8 */ { \
2216 if (HAVE_FAST_64BIT) { \
2217 for (n = 0; n < end; n += step) \
2218 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2220 for (n = 0; n < end; n += step) { \
2221 uint32_t v32 = la[n] * 0x01010101; \
2222 AV_WN32A(&la[n], v32); \
2223 AV_WN32A(&la[n + 4], v32); \
2227 for (n = 0; n < end; n += step) \
2228 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2231 #define SPLAT_CTX(step) \
2233 SPLAT(a, end_x, step, end_x == w4); \
2234 SPLAT(l, end_y, step, end_y == h4); \
2240 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2243 MERGE_CTX(2, AV_RN16A);
2244 DECODE_Y_COEF_LOOP(2, 0,);
2248 MERGE_CTX(4, AV_RN32A);
2249 DECODE_Y_COEF_LOOP(4, 0,);
2253 MERGE_CTX(8, AV_RN64A);
2254 DECODE_Y_COEF_LOOP(8, 0, 32);
2259 #define DECODE_UV_COEF_LOOP(step) \
2260 for (n = 0, y = 0; y < end_y; y += step) { \
2261 for (x = 0; x < end_x; x += step, n += step * step) { \
2262 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2263 16 * step * step, c, e, p, a[x] + l[y], \
2264 uvscan, uvnb, uv_band_counts, qmul[1]); \
2265 a[x] = l[y] = !!res; \
2267 AV_WN16A(&s->uveob[pl][n], res); \
2269 s->uveob[pl][n] = res; \
2274 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2275 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2276 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2281 for (pl = 0; pl < 2; pl++) {
2282 a = &s->above_uv_nnz_ctx[pl][col];
2283 l = &s->left_uv_nnz_ctx[pl][row & 7];
2286 DECODE_UV_COEF_LOOP(1);
2289 MERGE_CTX(2, AV_RN16A);
2290 DECODE_UV_COEF_LOOP(2);
2294 MERGE_CTX(4, AV_RN32A);
2295 DECODE_UV_COEF_LOOP(4);
2299 MERGE_CTX(8, AV_RN64A);
2300 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2301 // so there is no need to loop
2302 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2303 1024, c, e, p, a[0] + l[0],
2304 uvscan, uvnb, uv_band_counts, qmul[1]);
2305 a[0] = l[0] = !!res;
2306 AV_WN16A(&s->uveob[pl][0], res);
2313 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2314 uint8_t *dst_edge, ptrdiff_t stride_edge,
2315 uint8_t *dst_inner, ptrdiff_t stride_inner,
2316 uint8_t *l, int col, int x, int w,
2317 int row, int y, enum TxfmMode tx,
2320 int have_top = row > 0 || y > 0;
2321 int have_left = col > s->tiling.tile_col_start || x > 0;
2322 int have_right = x < w - 1;
2323 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2324 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2325 { DC_127_PRED, VERT_PRED } },
2326 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2327 { HOR_PRED, HOR_PRED } },
2328 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2329 { LEFT_DC_PRED, DC_PRED } },
2330 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2331 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2332 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2333 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2334 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2335 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2336 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2337 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2338 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2339 { DC_127_PRED, VERT_LEFT_PRED } },
2340 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2341 { HOR_UP_PRED, HOR_UP_PRED } },
2342 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2343 { HOR_PRED, TM_VP8_PRED } },
2345 static const struct {
2346 uint8_t needs_left:1;
2347 uint8_t needs_top:1;
2348 uint8_t needs_topleft:1;
2349 uint8_t needs_topright:1;
2350 uint8_t invert_left:1;
2351 } edges[N_INTRA_PRED_MODES] = {
2352 [VERT_PRED] = { .needs_top = 1 },
2353 [HOR_PRED] = { .needs_left = 1 },
2354 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2355 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2356 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2357 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2358 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2359 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2360 [HOR_UP_PRED] = { .needs_left = 1, .invert_left = 1 },
2361 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2362 [LEFT_DC_PRED] = { .needs_left = 1 },
2363 [TOP_DC_PRED] = { .needs_top = 1 },
2364 [DC_128_PRED] = { 0 },
2365 [DC_127_PRED] = { 0 },
2366 [DC_129_PRED] = { 0 }
2369 av_assert2(mode >= 0 && mode < 10);
2370 mode = mode_conv[mode][have_left][have_top];
2371 if (edges[mode].needs_top) {
2372 uint8_t *top, *topleft;
2373 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2374 int n_px_need_tr = 0;
2376 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2379 // if top of sb64-row, use s->intra_pred_data[] instead of
2380 // dst[-stride] for intra prediction (it contains pre- instead of
2381 // post-loopfilter data)
2383 top = !(row & 7) && !y ?
2384 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2385 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2387 topleft = !(row & 7) && !y ?
2388 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2389 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2390 &dst_inner[-stride_inner];
2394 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2395 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2396 n_px_need + n_px_need_tr <= n_px_have) {
2400 if (n_px_need <= n_px_have) {
2401 memcpy(*a, top, n_px_need);
2403 memcpy(*a, top, n_px_have);
2404 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2405 n_px_need - n_px_have);
2408 memset(*a, 127, n_px_need);
2410 if (edges[mode].needs_topleft) {
2411 if (have_left && have_top) {
2412 (*a)[-1] = topleft[-1];
2414 (*a)[-1] = have_top ? 129 : 127;
2417 if (tx == TX_4X4 && edges[mode].needs_topright) {
2418 if (have_top && have_right &&
2419 n_px_need + n_px_need_tr <= n_px_have) {
2420 memcpy(&(*a)[4], &top[4], 4);
2422 memset(&(*a)[4], (*a)[3], 4);
2427 if (edges[mode].needs_left) {
2429 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2430 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2431 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2433 if (edges[mode].invert_left) {
2434 if (n_px_need <= n_px_have) {
2435 for (i = 0; i < n_px_need; i++)
2436 l[i] = dst[i * stride - 1];
2438 for (i = 0; i < n_px_have; i++)
2439 l[i] = dst[i * stride - 1];
2440 memset(&l[n_px_have], l[n_px_have - 1], n_px_need - n_px_have);
2443 if (n_px_need <= n_px_have) {
2444 for (i = 0; i < n_px_need; i++)
2445 l[n_px_need - 1 - i] = dst[i * stride - 1];
2447 for (i = 0; i < n_px_have; i++)
2448 l[n_px_need - 1 - i] = dst[i * stride - 1];
2449 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2453 memset(l, 129, 4 << tx);
2460 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2462 VP9Context *s = ctx->priv_data;
2464 int row = s->row, col = s->col;
2465 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2466 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2467 int end_x = FFMIN(2 * (s->cols - col), w4);
2468 int end_y = FFMIN(2 * (s->rows - row), h4);
2469 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2470 int uvstep1d = 1 << b->uvtx, p;
2471 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2472 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2473 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2475 for (n = 0, y = 0; y < end_y; y += step1d) {
2476 uint8_t *ptr = dst, *ptr_r = dst_r;
2477 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2478 ptr_r += 4 * step1d, n += step) {
2479 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2481 uint8_t *a = &a_buf[32];
2482 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2483 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2485 mode = check_intra_mode(s, mode, &a, ptr_r,
2486 s->frames[CUR_FRAME].tf.f->linesize[0],
2487 ptr, s->y_stride, l,
2488 col, x, w4, row, y, b->tx, 0);
2489 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2491 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2492 s->block + 16 * n, eob);
2494 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2495 dst += 4 * step1d * s->y_stride;
2502 step = 1 << (b->uvtx * 2);
2503 for (p = 0; p < 2; p++) {
2504 dst = s->dst[1 + p];
2505 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2506 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2507 uint8_t *ptr = dst, *ptr_r = dst_r;
2508 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2509 ptr_r += 4 * uvstep1d, n += step) {
2510 int mode = b->uvmode;
2511 uint8_t *a = &a_buf[16];
2512 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2514 mode = check_intra_mode(s, mode, &a, ptr_r,
2515 s->frames[CUR_FRAME].tf.f->linesize[1],
2516 ptr, s->uv_stride, l,
2517 col, x, w4, row, y, b->uvtx, p + 1);
2518 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2520 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2521 s->uvblock[p] + 16 * n, eob);
2523 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2524 dst += 4 * uvstep1d * s->uv_stride;
2529 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2530 uint8_t *dst, ptrdiff_t dst_stride,
2531 const uint8_t *ref, ptrdiff_t ref_stride,
2532 ThreadFrame *ref_frame,
2533 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2534 int bw, int bh, int w, int h)
2536 int mx = mv->x, my = mv->y, th;
2540 ref += y * ref_stride + x;
2543 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2544 // we use +7 because the last 7 pixels of each sbrow can be changed in
2545 // the longest loopfilter of the next sbrow
2546 th = (y + bh + 4 * !!my + 7) >> 6;
2547 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2548 if (x < !!mx * 3 || y < !!my * 3 ||
2549 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2550 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2551 ref - !!my * 3 * ref_stride - !!mx * 3,
2553 bw + !!mx * 7, bh + !!my * 7,
2554 x - !!mx * 3, y - !!my * 3, w, h);
2555 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2558 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2561 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2562 uint8_t *dst_u, uint8_t *dst_v,
2563 ptrdiff_t dst_stride,
2564 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2565 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2566 ThreadFrame *ref_frame,
2567 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2568 int bw, int bh, int w, int h)
2570 int mx = mv->x, my = mv->y, th;
2574 ref_u += y * src_stride_u + x;
2575 ref_v += y * src_stride_v + x;
2578 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2579 // we use +7 because the last 7 pixels of each sbrow can be changed in
2580 // the longest loopfilter of the next sbrow
2581 th = (y + bh + 4 * !!my + 7) >> 5;
2582 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2583 if (x < !!mx * 3 || y < !!my * 3 ||
2584 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2585 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2586 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2588 bw + !!mx * 7, bh + !!my * 7,
2589 x - !!mx * 3, y - !!my * 3, w, h);
2590 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2591 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2593 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2594 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2596 bw + !!mx * 7, bh + !!my * 7,
2597 x - !!mx * 3, y - !!my * 3, w, h);
2598 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2599 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2601 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2602 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2606 static void inter_recon(AVCodecContext *ctx)
2608 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2609 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2610 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2612 VP9Context *s = ctx->priv_data;
2614 int row = s->row, col = s->col;
2615 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2616 AVFrame *ref1 = tref1->f, *ref2;
2617 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2618 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2621 tref2 = &s->refs[s->refidx[b->ref[1]]];
2628 if (b->bs > BS_8x8) {
2629 if (b->bs == BS_8x4) {
2630 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2631 ref1->data[0], ref1->linesize[0], tref1,
2632 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2633 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2634 s->dst[0] + 4 * ls_y, ls_y,
2635 ref1->data[0], ref1->linesize[0], tref1,
2636 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2639 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2640 ref2->data[0], ref2->linesize[0], tref2,
2641 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2642 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2643 s->dst[0] + 4 * ls_y, ls_y,
2644 ref2->data[0], ref2->linesize[0], tref2,
2645 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2647 } else if (b->bs == BS_4x8) {
2648 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2649 ref1->data[0], ref1->linesize[0], tref1,
2650 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2651 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2652 ref1->data[0], ref1->linesize[0], tref1,
2653 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2656 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2657 ref2->data[0], ref2->linesize[0], tref2,
2658 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2659 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2660 ref2->data[0], ref2->linesize[0], tref2,
2661 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2664 av_assert2(b->bs == BS_4x4);
2666 // FIXME if two horizontally adjacent blocks have the same MV,
2667 // do a w8 instead of a w4 call
2668 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2669 ref1->data[0], ref1->linesize[0], tref1,
2670 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2671 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2672 ref1->data[0], ref1->linesize[0], tref1,
2673 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2674 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2675 s->dst[0] + 4 * ls_y, ls_y,
2676 ref1->data[0], ref1->linesize[0], tref1,
2677 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2678 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2679 s->dst[0] + 4 * ls_y + 4, ls_y,
2680 ref1->data[0], ref1->linesize[0], tref1,
2681 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2684 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2685 ref2->data[0], ref2->linesize[0], tref2,
2686 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2687 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2688 ref2->data[0], ref2->linesize[0], tref2,
2689 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2690 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2691 s->dst[0] + 4 * ls_y, ls_y,
2692 ref2->data[0], ref2->linesize[0], tref2,
2693 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2694 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2695 s->dst[0] + 4 * ls_y + 4, ls_y,
2696 ref2->data[0], ref2->linesize[0], tref2,
2697 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2701 int bwl = bwlog_tab[0][b->bs];
2702 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2704 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2705 ref1->data[0], ref1->linesize[0], tref1,
2706 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2709 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2710 ref2->data[0], ref2->linesize[0], tref2,
2711 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2716 int bwl = bwlog_tab[1][b->bs];
2717 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2726 if (b->bs > BS_8x8) {
2727 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2728 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2733 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2734 s->dst[1], s->dst[2], ls_uv,
2735 ref1->data[1], ref1->linesize[1],
2736 ref1->data[2], ref1->linesize[2], tref1,
2737 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2740 if (b->bs > BS_8x8) {
2741 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2742 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2746 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2747 s->dst[1], s->dst[2], ls_uv,
2748 ref2->data[1], ref2->linesize[1],
2749 ref2->data[2], ref2->linesize[2], tref2,
2750 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2755 /* mostly copied intra_reconn() */
2757 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2758 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2759 int end_x = FFMIN(2 * (s->cols - col), w4);
2760 int end_y = FFMIN(2 * (s->rows - row), h4);
2761 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2762 int uvstep1d = 1 << b->uvtx, p;
2763 uint8_t *dst = s->dst[0];
2766 for (n = 0, y = 0; y < end_y; y += step1d) {
2768 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2769 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2772 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2773 s->block + 16 * n, eob);
2775 dst += 4 * s->y_stride * step1d;
2781 step = 1 << (b->uvtx * 2);
2782 for (p = 0; p < 2; p++) {
2783 dst = s->dst[p + 1];
2784 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2786 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2787 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2790 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2791 s->uvblock[p] + 16 * n, eob);
2793 dst += 4 * uvstep1d * s->uv_stride;
2799 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2800 int row_and_7, int col_and_7,
2801 int w, int h, int col_end, int row_end,
2802 enum TxfmMode tx, int skip_inter)
2804 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2805 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2806 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2807 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2809 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2810 // edges. This means that for UV, we work on two subsampled blocks at
2811 // a time, and we only use the topleft block's mode information to set
2812 // things like block strength. Thus, for any block size smaller than
2813 // 16x16, ignore the odd portion of the block.
2814 if (tx == TX_4X4 && is_uv) {
2829 if (tx == TX_4X4 && !skip_inter) {
2830 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2831 int m_col_odd = (t << (w - 1)) - t;
2833 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2835 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2837 for (y = row_and_7; y < h + row_and_7; y++) {
2838 int col_mask_id = 2 - !(y & 7);
2840 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2841 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2842 // for odd lines, if the odd col is not being filtered,
2843 // skip odd row also:
2850 // if a/c are even row/col and b/d are odd, and d is skipped,
2851 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2852 if ((col_end & 1) && (y & 1)) {
2853 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2855 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2859 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2861 for (y = row_and_7; y < h + row_and_7; y++) {
2862 int col_mask_id = 2 - !(y & 3);
2864 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2865 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2866 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2867 lflvl->mask[is_uv][0][y][3] |= m_col;
2868 lflvl->mask[is_uv][1][y][3] |= m_col;
2872 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2875 int mask_id = (tx == TX_8X8);
2876 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2877 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2878 int m_row = m_col & masks[l2];
2880 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2881 // 8wd loopfilter to prevent going off the visible edge.
2882 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2883 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2884 int m_row_8 = m_row - m_row_16;
2886 for (y = row_and_7; y < h + row_and_7; y++) {
2887 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2888 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2891 for (y = row_and_7; y < h + row_and_7; y++)
2892 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2895 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2896 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2897 lflvl->mask[is_uv][1][y][0] |= m_col;
2898 if (y - row_and_7 == h - 1)
2899 lflvl->mask[is_uv][1][y][1] |= m_col;
2901 for (y = row_and_7; y < h + row_and_7; y += step1d)
2902 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2904 } else if (tx != TX_4X4) {
2907 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2908 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2909 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2910 for (y = row_and_7; y < h + row_and_7; y++)
2911 lflvl->mask[is_uv][0][y][mask_id] |= t;
2913 int t8 = t & 0x01, t4 = t - t8;
2915 for (y = row_and_7; y < h + row_and_7; y++) {
2916 lflvl->mask[is_uv][0][y][2] |= t4;
2917 lflvl->mask[is_uv][0][y][1] |= t8;
2919 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2921 int t8 = t & 0x11, t4 = t - t8;
2923 for (y = row_and_7; y < h + row_and_7; y++) {
2924 lflvl->mask[is_uv][0][y][2] |= t4;
2925 lflvl->mask[is_uv][0][y][1] |= t8;
2927 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2932 static void decode_b(AVCodecContext *ctx, int row, int col,
2933 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2934 enum BlockLevel bl, enum BlockPartition bp)
2936 VP9Context *s = ctx->priv_data;
2938 enum BlockSize bs = bl * 3 + bp;
2939 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2941 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2947 s->min_mv.x = -(128 + col * 64);
2948 s->min_mv.y = -(128 + row * 64);
2949 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2950 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2956 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2963 #define SPLAT_ZERO_CTX(v, n) \
2965 case 1: v = 0; break; \
2966 case 2: AV_ZERO16(&v); break; \
2967 case 4: AV_ZERO32(&v); break; \
2968 case 8: AV_ZERO64(&v); break; \
2969 case 16: AV_ZERO128(&v); break; \
2971 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2973 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2974 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2975 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2979 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2980 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2981 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2982 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2985 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2986 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2987 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2988 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2993 s->block += w4 * h4 * 64;
2994 s->uvblock[0] += w4 * h4 * 16;
2995 s->uvblock[1] += w4 * h4 * 16;
2996 s->eob += 4 * w4 * h4;
2997 s->uveob[0] += w4 * h4;
2998 s->uveob[1] += w4 * h4;
3004 // emulated overhangs if the stride of the target buffer can't hold. This
3005 // allows to support emu-edge and so on even if we have large block
3007 emu[0] = (col + w4) * 8 > f->linesize[0] ||
3008 (row + h4) > s->rows;
3009 emu[1] = (col + w4) * 4 > f->linesize[1] ||
3010 (row + h4) > s->rows;
3012 s->dst[0] = s->tmp_y;
3015 s->dst[0] = f->data[0] + yoff;
3016 s->y_stride = f->linesize[0];
3019 s->dst[1] = s->tmp_uv[0];
3020 s->dst[2] = s->tmp_uv[1];
3023 s->dst[1] = f->data[1] + uvoff;
3024 s->dst[2] = f->data[2] + uvoff;
3025 s->uv_stride = f->linesize[1];
3028 intra_recon(ctx, yoff, uvoff);
3033 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3035 for (n = 0; o < w; n++) {
3040 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3041 s->tmp_y + o, 64, h, 0, 0);
3047 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3049 for (n = 1; o < w; n++) {
3054 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3055 s->tmp_uv[0] + o, 32, h, 0, 0);
3056 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3057 s->tmp_uv[1] + o, 32, h, 0, 0);
3063 // pick filter level and find edges to apply filter to
3064 if (s->filter.level &&
3065 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3066 [b->mode[3] != ZEROMV]) > 0) {
3067 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3068 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3070 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3071 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3072 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3073 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3074 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3075 b->uvtx, skip_inter);
3077 if (!s->filter.lim_lut[lvl]) {
3078 int sharp = s->filter.sharpness;
3082 limit >>= (sharp + 3) >> 2;
3083 limit = FFMIN(limit, 9 - sharp);
3085 limit = FFMAX(limit, 1);
3087 s->filter.lim_lut[lvl] = limit;
3088 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3094 s->block += w4 * h4 * 64;
3095 s->uvblock[0] += w4 * h4 * 16;
3096 s->uvblock[1] += w4 * h4 * 16;
3097 s->eob += 4 * w4 * h4;
3098 s->uveob[0] += w4 * h4;
3099 s->uveob[1] += w4 * h4;
3103 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3104 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3106 VP9Context *s = ctx->priv_data;
3107 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3108 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3109 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3110 s->prob.p.partition[bl][c];
3111 enum BlockPartition bp;
3112 ptrdiff_t hbs = 4 >> bl;
3113 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3114 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3117 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3118 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3119 } else if (col + hbs < s->cols) { // FIXME why not <=?
3120 if (row + hbs < s->rows) { // FIXME why not <=?
3121 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3123 case PARTITION_NONE:
3124 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3127 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3128 yoff += hbs * 8 * y_stride;
3129 uvoff += hbs * 4 * uv_stride;
3130 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3133 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3136 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3138 case PARTITION_SPLIT:
3139 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3140 decode_sb(ctx, row, col + hbs, lflvl,
3141 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3142 yoff += hbs * 8 * y_stride;
3143 uvoff += hbs * 4 * uv_stride;
3144 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3145 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3146 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3151 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3152 bp = PARTITION_SPLIT;
3153 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3154 decode_sb(ctx, row, col + hbs, lflvl,
3155 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3158 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3160 } else if (row + hbs < s->rows) { // FIXME why not <=?
3161 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3162 bp = PARTITION_SPLIT;
3163 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3164 yoff += hbs * 8 * y_stride;
3165 uvoff += hbs * 4 * uv_stride;
3166 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3169 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3172 bp = PARTITION_SPLIT;
3173 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3175 s->counts.partition[bl][c][bp]++;
3178 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3179 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3181 VP9Context *s = ctx->priv_data;
3183 ptrdiff_t hbs = 4 >> bl;
3184 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3185 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3188 av_assert2(b->bl == BL_8X8);
3189 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3190 } else if (s->b->bl == bl) {
3191 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3192 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3193 yoff += hbs * 8 * y_stride;
3194 uvoff += hbs * 4 * uv_stride;
3195 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3196 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3199 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3202 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3203 if (col + hbs < s->cols) { // FIXME why not <=?
3204 if (row + hbs < s->rows) {
3205 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3206 uvoff + 4 * hbs, bl + 1);
3207 yoff += hbs * 8 * y_stride;
3208 uvoff += hbs * 4 * uv_stride;
3209 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3210 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3211 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3215 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3217 } else if (row + hbs < s->rows) {
3218 yoff += hbs * 8 * y_stride;
3219 uvoff += hbs * 4 * uv_stride;
3220 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3225 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3226 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3228 VP9Context *s = ctx->priv_data;
3229 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3230 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3231 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3234 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3235 // if you think of them as acting on a 8x8 block max, we can interleave
3236 // each v/h within the single x loop, but that only works if we work on
3237 // 8 pixel blocks, and we won't always do that (we want at least 16px
3238 // to use SSE2 optimizations, perhaps 32 for AVX2)
3240 // filter edges between columns, Y plane (e.g. block1 | block2)
3241 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3242 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3243 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3244 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3245 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3246 unsigned hm = hm1 | hm2 | hm13 | hm23;
3248 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3250 int L = *l, H = L >> 4;
3251 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3254 if (hmask1[0] & x) {
3255 if (hmask2[0] & x) {
3256 av_assert2(l[8] == L);
3257 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3259 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3261 } else if (hm2 & x) {
3264 E |= s->filter.mblim_lut[L] << 8;
3265 I |= s->filter.lim_lut[L] << 8;
3266 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3268 [0](ptr, ls_y, E, I, H);
3270 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3271 [0](ptr, ls_y, E, I, H);
3274 } else if (hm2 & x) {
3275 int L = l[8], H = L >> 4;
3276 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3279 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3280 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3284 int L = *l, H = L >> 4;
3285 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3290 E |= s->filter.mblim_lut[L] << 8;
3291 I |= s->filter.lim_lut[L] << 8;
3292 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3294 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3296 } else if (hm23 & x) {
3297 int L = l[8], H = L >> 4;
3298 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3300 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3306 // filter edges between rows, Y plane (e.g. ------)
3308 dst = f->data[0] + yoff;
3310 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3311 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3312 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3314 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3317 int L = *l, H = L >> 4;
3318 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3321 if (vmask[0] & (x << 1)) {
3322 av_assert2(l[1] == L);
3323 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3325 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3327 } else if (vm & (x << 1)) {
3330 E |= s->filter.mblim_lut[L] << 8;
3331 I |= s->filter.lim_lut[L] << 8;
3332 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3333 [!!(vmask[1] & (x << 1))]
3334 [1](ptr, ls_y, E, I, H);
3336 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3337 [1](ptr, ls_y, E, I, H);
3339 } else if (vm & (x << 1)) {
3340 int L = l[1], H = L >> 4;
3341 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3343 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3344 [1](ptr + 8, ls_y, E, I, H);
3348 int L = *l, H = L >> 4;
3349 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3351 if (vm3 & (x << 1)) {
3354 E |= s->filter.mblim_lut[L] << 8;
3355 I |= s->filter.lim_lut[L] << 8;
3356 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3358 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3360 } else if (vm3 & (x << 1)) {
3361 int L = l[1], H = L >> 4;
3362 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3364 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3369 // same principle but for U/V planes
3370 for (p = 0; p < 2; p++) {
3372 dst = f->data[1 + p] + uvoff;
3373 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3374 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3375 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3376 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3377 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3379 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3382 int L = *l, H = L >> 4;
3383 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3385 if (hmask1[0] & x) {
3386 if (hmask2[0] & x) {
3387 av_assert2(l[16] == L);
3388 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3390 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3392 } else if (hm2 & x) {
3395 E |= s->filter.mblim_lut[L] << 8;
3396 I |= s->filter.lim_lut[L] << 8;
3397 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3399 [0](ptr, ls_uv, E, I, H);
3401 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3402 [0](ptr, ls_uv, E, I, H);
3404 } else if (hm2 & x) {
3405 int L = l[16], H = L >> 4;
3406 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3408 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3409 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3417 dst = f->data[1 + p] + uvoff;
3418 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3419 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3420 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3422 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3425 int L = *l, H = L >> 4;
3426 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3429 if (vmask[0] & (x << 2)) {
3430 av_assert2(l[2] == L);
3431 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3433 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3435 } else if (vm & (x << 2)) {
3438 E |= s->filter.mblim_lut[L] << 8;
3439 I |= s->filter.lim_lut[L] << 8;
3440 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3441 [!!(vmask[1] & (x << 2))]
3442 [1](ptr, ls_uv, E, I, H);
3444 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3445 [1](ptr, ls_uv, E, I, H);
3447 } else if (vm & (x << 2)) {
3448 int L = l[2], H = L >> 4;
3449 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3451 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3452 [1](ptr + 8, ls_uv, E, I, H);
3462 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3464 int sb_start = ( idx * n) >> log2_n;
3465 int sb_end = ((idx + 1) * n) >> log2_n;
3466 *start = FFMIN(sb_start, n) << 3;
3467 *end = FFMIN(sb_end, n) << 3;
3470 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3471 int max_count, int update_factor)
3473 unsigned ct = ct0 + ct1, p2, p1;
3479 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3480 p2 = av_clip(p2, 1, 255);
3481 ct = FFMIN(ct, max_count);
3482 update_factor = FASTDIV(update_factor * ct, max_count);
3484 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3485 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3488 static void adapt_probs(VP9Context *s)
3491 prob_context *p = &s->prob_ctx[s->framectxid].p;
3492 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3495 for (i = 0; i < 4; i++)
3496 for (j = 0; j < 2; j++)
3497 for (k = 0; k < 2; k++)
3498 for (l = 0; l < 6; l++)
3499 for (m = 0; m < 6; m++) {
3500 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3501 unsigned *e = s->counts.eob[i][j][k][l][m];
3502 unsigned *c = s->counts.coef[i][j][k][l][m];
3504 if (l == 0 && m >= 3) // dc only has 3 pt
3507 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3508 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3509 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3512 if (s->keyframe || s->intraonly) {
3513 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3514 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3515 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3516 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3521 for (i = 0; i < 3; i++)
3522 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3525 for (i = 0; i < 4; i++)
3526 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3529 if (s->comppredmode == PRED_SWITCHABLE) {
3530 for (i = 0; i < 5; i++)
3531 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3535 if (s->comppredmode != PRED_SINGLEREF) {
3536 for (i = 0; i < 5; i++)
3537 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3538 s->counts.comp_ref[i][1], 20, 128);
3541 if (s->comppredmode != PRED_COMPREF) {
3542 for (i = 0; i < 5; i++) {
3543 uint8_t *pp = p->single_ref[i];
3544 unsigned (*c)[2] = s->counts.single_ref[i];
3546 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3547 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3551 // block partitioning
3552 for (i = 0; i < 4; i++)
3553 for (j = 0; j < 4; j++) {
3554 uint8_t *pp = p->partition[i][j];
3555 unsigned *c = s->counts.partition[i][j];
3557 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3558 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3559 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3563 if (s->txfmmode == TX_SWITCHABLE) {
3564 for (i = 0; i < 2; i++) {
3565 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3567 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3568 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3569 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3570 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3571 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3572 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3576 // interpolation filter
3577 if (s->filtermode == FILTER_SWITCHABLE) {
3578 for (i = 0; i < 4; i++) {
3579 uint8_t *pp = p->filter[i];
3580 unsigned *c = s->counts.filter[i];
3582 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3583 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3588 for (i = 0; i < 7; i++) {
3589 uint8_t *pp = p->mv_mode[i];
3590 unsigned *c = s->counts.mv_mode[i];
3592 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3593 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3594 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3599 uint8_t *pp = p->mv_joint;
3600 unsigned *c = s->counts.mv_joint;
3602 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3603 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3604 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3608 for (i = 0; i < 2; i++) {
3610 unsigned *c, (*c2)[2], sum;
3612 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3613 s->counts.mv_comp[i].sign[1], 20, 128);
3615 pp = p->mv_comp[i].classes;
3616 c = s->counts.mv_comp[i].classes;
3617 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3618 adapt_prob(&pp[0], c[0], sum, 20, 128);
3620 adapt_prob(&pp[1], c[1], sum, 20, 128);
3622 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3623 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3625 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3626 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3628 adapt_prob(&pp[6], c[6], sum, 20, 128);
3629 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3630 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3631 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3633 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3634 s->counts.mv_comp[i].class0[1], 20, 128);
3635 pp = p->mv_comp[i].bits;
3636 c2 = s->counts.mv_comp[i].bits;
3637 for (j = 0; j < 10; j++)
3638 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3640 for (j = 0; j < 2; j++) {
3641 pp = p->mv_comp[i].class0_fp[j];
3642 c = s->counts.mv_comp[i].class0_fp[j];
3643 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3644 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3645 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3647 pp = p->mv_comp[i].fp;
3648 c = s->counts.mv_comp[i].fp;
3649 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3650 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3651 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3653 if (s->highprecisionmvs) {
3654 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3655 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3656 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3657 s->counts.mv_comp[i].hp[1], 20, 128);
3662 for (i = 0; i < 4; i++) {
3663 uint8_t *pp = p->y_mode[i];
3664 unsigned *c = s->counts.y_mode[i], sum, s2;
3666 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3667 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3668 sum -= c[TM_VP8_PRED];
3669 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3670 sum -= c[VERT_PRED];
3671 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3672 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3674 adapt_prob(&pp[3], s2, sum, 20, 128);
3676 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3677 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3678 sum -= c[DIAG_DOWN_LEFT_PRED];
3679 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3680 sum -= c[VERT_LEFT_PRED];
3681 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3682 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3686 for (i = 0; i < 10; i++) {
3687 uint8_t *pp = p->uv_mode[i];
3688 unsigned *c = s->counts.uv_mode[i], sum, s2;
3690 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3691 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3692 sum -= c[TM_VP8_PRED];
3693 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3694 sum -= c[VERT_PRED];
3695 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3696 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3698 adapt_prob(&pp[3], s2, sum, 20, 128);
3700 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3701 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3702 sum -= c[DIAG_DOWN_LEFT_PRED];
3703 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3704 sum -= c[VERT_LEFT_PRED];
3705 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3706 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3710 static void free_buffers(VP9Context *s)
3712 av_freep(&s->intra_pred_data[0]);
3713 av_freep(&s->b_base);
3714 av_freep(&s->block_base);
3717 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3719 VP9Context *s = ctx->priv_data;
3722 for (i = 0; i < 2; i++) {
3723 if (s->frames[i].tf.f->data[0])
3724 vp9_unref_frame(ctx, &s->frames[i]);
3725 av_frame_free(&s->frames[i].tf.f);
3727 for (i = 0; i < 8; i++) {
3728 if (s->refs[i].f->data[0])
3729 ff_thread_release_buffer(ctx, &s->refs[i]);
3730 av_frame_free(&s->refs[i].f);
3731 if (s->next_refs[i].f->data[0])
3732 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3733 av_frame_free(&s->next_refs[i].f);
3743 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3744 int *got_frame, AVPacket *pkt)
3746 const uint8_t *data = pkt->data;
3747 int size = pkt->size;
3748 VP9Context *s = ctx->priv_data;
3749 int res, tile_row, tile_col, i, ref, row, col;
3750 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3753 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3755 } else if (res == 0) {
3756 if (!s->refs[ref].f->data[0]) {
3757 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3758 return AVERROR_INVALIDDATA;
3760 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3768 if (s->frames[LAST_FRAME].tf.f->data[0])
3769 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3770 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3771 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3773 if (s->frames[CUR_FRAME].tf.f->data[0])
3774 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3775 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3777 f = s->frames[CUR_FRAME].tf.f;
3778 f->key_frame = s->keyframe;
3779 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3780 ls_y = f->linesize[0];
3781 ls_uv =f->linesize[1];
3784 for (i = 0; i < 8; i++) {
3785 if (s->next_refs[i].f->data[0])
3786 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3787 if (s->refreshrefmask & (1 << i)) {
3788 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3790 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3797 ctx->color_range = AVCOL_RANGE_JPEG;
3799 ctx->color_range = AVCOL_RANGE_MPEG;
3801 switch (s->colorspace) {
3802 case 1: ctx->colorspace = AVCOL_SPC_BT470BG; break;
3803 case 2: ctx->colorspace = AVCOL_SPC_BT709; break;
3804 case 3: ctx->colorspace = AVCOL_SPC_SMPTE170M; break;
3805 case 4: ctx->colorspace = AVCOL_SPC_SMPTE240M; break;
3808 // main tile decode loop
3809 memset(s->above_partition_ctx, 0, s->cols);
3810 memset(s->above_skip_ctx, 0, s->cols);
3811 if (s->keyframe || s->intraonly) {
3812 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3814 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3816 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3817 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3818 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3819 memset(s->above_segpred_ctx, 0, s->cols);
3820 s->pass = s->uses_2pass =
3821 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3822 if ((res = update_block_buffers(ctx)) < 0) {
3823 av_log(ctx, AV_LOG_ERROR,
3824 "Failed to allocate block buffers\n");
3827 if (s->refreshctx && s->parallelmode) {
3830 for (i = 0; i < 4; i++) {
3831 for (j = 0; j < 2; j++)
3832 for (k = 0; k < 2; k++)
3833 for (l = 0; l < 6; l++)
3834 for (m = 0; m < 6; m++)
3835 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3836 s->prob.coef[i][j][k][l][m], 3);
3837 if (s->txfmmode == i)
3840 s->prob_ctx[s->framectxid].p = s->prob.p;
3841 ff_thread_finish_setup(ctx);
3842 } else if (!s->refreshctx) {
3843 ff_thread_finish_setup(ctx);
3849 s->block = s->block_base;
3850 s->uvblock[0] = s->uvblock_base[0];
3851 s->uvblock[1] = s->uvblock_base[1];
3852 s->eob = s->eob_base;
3853 s->uveob[0] = s->uveob_base[0];
3854 s->uveob[1] = s->uveob_base[1];
3856 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3857 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3858 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3860 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3863 if (tile_col == s->tiling.tile_cols - 1 &&
3864 tile_row == s->tiling.tile_rows - 1) {
3867 tile_size = AV_RB32(data);
3871 if (tile_size > size) {
3872 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3873 return AVERROR_INVALIDDATA;
3875 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3876 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3877 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3878 return AVERROR_INVALIDDATA;
3885 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3886 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3887 struct VP9Filter *lflvl_ptr = s->lflvl;
3888 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3890 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3891 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3892 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3895 memset(s->left_partition_ctx, 0, 8);
3896 memset(s->left_skip_ctx, 0, 8);
3897 if (s->keyframe || s->intraonly) {
3898 memset(s->left_mode_ctx, DC_PRED, 16);
3900 memset(s->left_mode_ctx, NEARESTMV, 8);
3902 memset(s->left_y_nnz_ctx, 0, 16);
3903 memset(s->left_uv_nnz_ctx, 0, 16);
3904 memset(s->left_segpred_ctx, 0, 8);
3906 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3909 for (col = s->tiling.tile_col_start;
3910 col < s->tiling.tile_col_end;
3911 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3912 // FIXME integrate with lf code (i.e. zero after each
3913 // use, similar to invtxfm coefficients, or similar)
3915 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3919 decode_sb_mem(ctx, row, col, lflvl_ptr,
3920 yoff2, uvoff2, BL_64X64);
3922 decode_sb(ctx, row, col, lflvl_ptr,
3923 yoff2, uvoff2, BL_64X64);
3927 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3935 // backup pre-loopfilter reconstruction data for intra
3936 // prediction of next row of sb64s
3937 if (row + 8 < s->rows) {
3938 memcpy(s->intra_pred_data[0],
3939 f->data[0] + yoff + 63 * ls_y,
3941 memcpy(s->intra_pred_data[1],
3942 f->data[1] + uvoff + 31 * ls_uv,
3944 memcpy(s->intra_pred_data[2],
3945 f->data[2] + uvoff + 31 * ls_uv,
3949 // loopfilter one row
3950 if (s->filter.level) {
3953 lflvl_ptr = s->lflvl;
3954 for (col = 0; col < s->cols;
3955 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3956 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3960 // FIXME maybe we can make this more finegrained by running the
3961 // loopfilter per-block instead of after each sbrow
3962 // In fact that would also make intra pred left preparation easier?
3963 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3967 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3969 ff_thread_finish_setup(ctx);
3971 } while (s->pass++ == 1);
3972 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3975 for (i = 0; i < 8; i++) {
3976 if (s->refs[i].f->data[0])
3977 ff_thread_release_buffer(ctx, &s->refs[i]);
3978 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3981 if (!s->invisible) {
3982 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3990 static void vp9_decode_flush(AVCodecContext *ctx)
3992 VP9Context *s = ctx->priv_data;
3995 for (i = 0; i < 2; i++)
3996 vp9_unref_frame(ctx, &s->frames[i]);
3997 for (i = 0; i < 8; i++)
3998 ff_thread_release_buffer(ctx, &s->refs[i]);
4001 static int init_frames(AVCodecContext *ctx)
4003 VP9Context *s = ctx->priv_data;
4006 for (i = 0; i < 2; i++) {
4007 s->frames[i].tf.f = av_frame_alloc();
4008 if (!s->frames[i].tf.f) {
4009 vp9_decode_free(ctx);
4010 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4011 return AVERROR(ENOMEM);
4014 for (i = 0; i < 8; i++) {
4015 s->refs[i].f = av_frame_alloc();
4016 s->next_refs[i].f = av_frame_alloc();
4017 if (!s->refs[i].f || !s->next_refs[i].f) {
4018 vp9_decode_free(ctx);
4019 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
4020 return AVERROR(ENOMEM);
4027 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4029 VP9Context *s = ctx->priv_data;
4031 ctx->internal->allocate_progress = 1;
4032 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4033 ff_vp9dsp_init(&s->dsp);
4034 ff_videodsp_init(&s->vdsp, 8);
4035 s->filter.sharpness = -1;
4037 return init_frames(ctx);
4040 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4042 return init_frames(avctx);
4045 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4048 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4050 // detect size changes in other threads
4051 if (s->intra_pred_data[0] &&
4052 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4056 for (i = 0; i < 2; i++) {
4057 if (s->frames[i].tf.f->data[0])
4058 vp9_unref_frame(dst, &s->frames[i]);
4059 if (ssrc->frames[i].tf.f->data[0]) {
4060 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4064 for (i = 0; i < 8; i++) {
4065 if (s->refs[i].f->data[0])
4066 ff_thread_release_buffer(dst, &s->refs[i]);
4067 if (ssrc->next_refs[i].f->data[0]) {
4068 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4073 s->invisible = ssrc->invisible;
4074 s->keyframe = ssrc->keyframe;
4075 s->uses_2pass = ssrc->uses_2pass;
4076 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4077 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4078 if (ssrc->segmentation.enabled) {
4079 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4080 sizeof(s->segmentation.feat));
4086 AVCodec ff_vp9_decoder = {
4088 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4089 .type = AVMEDIA_TYPE_VIDEO,
4090 .id = AV_CODEC_ID_VP9,
4091 .priv_data_size = sizeof(VP9Context),
4092 .init = vp9_decode_init,
4093 .close = vp9_decode_free,
4094 .decode = vp9_decode_frame,
4095 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4096 .flush = vp9_decode_flush,
4097 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4098 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),