2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
151 uint8_t absolute_vals;
157 uint8_t skip_enabled;
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 unsigned sb_cols, sb_rows, rows, cols;
173 uint8_t coef[4][2][2][6][6][3];
177 uint8_t coef[4][2][2][6][6][11];
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
194 unsigned mv_joint[4];
197 unsigned classes[11];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
202 unsigned class0_hp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
212 // contextual (left/above) cache
213 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
214 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
215 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
216 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
217 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
218 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
225 uint8_t *above_partition_ctx;
226 uint8_t *above_mode_ctx;
227 // FIXME maybe merge some of the below in a flags field?
228 uint8_t *above_y_nnz_ctx;
229 uint8_t *above_uv_nnz_ctx[2];
230 uint8_t *above_skip_ctx; // 1bit
231 uint8_t *above_txfm_ctx; // 2bit
232 uint8_t *above_segpred_ctx; // 1bit
233 uint8_t *above_intra_ctx; // 1bit
234 uint8_t *above_comp_ctx; // 1bit
235 uint8_t *above_ref_ctx; // 2bit
236 uint8_t *above_filter_ctx;
237 VP56mv (*above_mv_ctx)[2];
240 uint8_t *intra_pred_data[3];
241 struct VP9Filter *lflvl;
242 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
244 // block reconstruction intermediates
245 int block_alloc_using_2pass;
246 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
247 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
248 struct { int x, y; } min_mv, max_mv;
249 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
250 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
253 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
255 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
256 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
258 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
259 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
263 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
265 VP9Context *s = ctx->priv_data;
268 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
270 sz = 64 * s->sb_cols * s->sb_rows;
271 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
272 ff_thread_release_buffer(ctx, &f->tf);
273 return AVERROR(ENOMEM);
276 f->segmentation_map = f->extradata->data;
277 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
279 // retain segmentation map if it doesn't update
280 if (s->segmentation.enabled && !s->segmentation.update_map &&
281 !s->intraonly && !s->keyframe) {
282 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
311 static int update_size(AVCodecContext *ctx, int w, int h)
313 VP9Context *s = ctx->priv_data;
316 av_assert0(w > 0 && h > 0);
318 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
332 return AVERROR(ENOMEM);
333 assign(s->intra_pred_data[0], uint8_t *, 64);
334 assign(s->intra_pred_data[1], uint8_t *, 32);
335 assign(s->intra_pred_data[2], uint8_t *, 32);
336 assign(s->above_y_nnz_ctx, uint8_t *, 16);
337 assign(s->above_mode_ctx, uint8_t *, 16);
338 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
339 assign(s->above_partition_ctx, uint8_t *, 8);
340 assign(s->above_skip_ctx, uint8_t *, 8);
341 assign(s->above_txfm_ctx, uint8_t *, 8);
342 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
344 assign(s->above_segpred_ctx, uint8_t *, 8);
345 assign(s->above_intra_ctx, uint8_t *, 8);
346 assign(s->above_comp_ctx, uint8_t *, 8);
347 assign(s->above_ref_ctx, uint8_t *, 8);
348 assign(s->above_filter_ctx, uint8_t *, 8);
349 assign(s->lflvl, struct VP9Filter *, 1);
352 // these will be re-allocated a little later
353 av_freep(&s->b_base);
354 av_freep(&s->block_base);
359 static int update_block_buffers(AVCodecContext *ctx)
361 VP9Context *s = ctx->priv_data;
363 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
367 av_free(s->block_base);
369 int sbs = s->sb_cols * s->sb_rows;
371 s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
372 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
373 if (!s->b_base || !s->block_base)
374 return AVERROR(ENOMEM);
375 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
376 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
377 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
378 s->uveob_base[0] = s->eob_base + 256 * sbs;
379 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
381 s->b_base = av_malloc(sizeof(VP9Block));
382 s->block_base = av_mallocz((64 * 64 + 128) * 3);
383 if (!s->b_base || !s->block_base)
384 return AVERROR(ENOMEM);
385 s->uvblock_base[0] = s->block_base + 64 * 64;
386 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
387 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
388 s->uveob_base[0] = s->eob_base + 256;
389 s->uveob_base[1] = s->uveob_base[0] + 64;
391 s->block_alloc_using_2pass = s->uses_2pass;
396 // for some reason the sign bit is at the end, not the start, of a bit sequence
397 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
399 int v = get_bits(gb, n);
400 return get_bits1(gb) ? -v : v;
403 static av_always_inline int inv_recenter_nonneg(int v, int m)
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
408 // differential forward probability updates
409 static int update_prob(VP56RangeCoder *c, int p)
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
434 /* This code is trying to do a differential probability update. For a
435 * current probability A in the range [1, 255], the difference to a new
436 * probability of any value can be expressed differentially as 1-A,255-A
437 * where some part of this (absolute range) exists both in positive as
438 * well as the negative part, whereas another part only exists in one
439 * half. We're trying to code this shared part differentially, i.e.
440 * times two where the value of the lowest bit specifies the sign, and
441 * the single part is then coded on top of this. This absolute difference
442 * then again has a value of [0,254], but a bigger value in this range
443 * indicates that we're further away from the original value A, so we
444 * can code this as a VLC code, since higher values are increasingly
445 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
446 * updates vs. the 'fine, exact' updates further down the range, which
447 * adds one extra dimension to this differential update model. */
449 if (!vp8_rac_get(c)) {
450 d = vp8_rac_get_uint(c, 4) + 0;
451 } else if (!vp8_rac_get(c)) {
452 d = vp8_rac_get_uint(c, 4) + 16;
453 } else if (!vp8_rac_get(c)) {
454 d = vp8_rac_get_uint(c, 5) + 32;
456 d = vp8_rac_get_uint(c, 7);
458 d = (d << 1) - 65 + vp8_rac_get(c);
462 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
463 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
466 static int decode_frame_header(AVCodecContext *ctx,
467 const uint8_t *data, int size, int *ref)
469 VP9Context *s = ctx->priv_data;
470 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
472 const uint8_t *data2;
475 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
476 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
479 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
480 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
481 return AVERROR_INVALIDDATA;
483 s->profile = get_bits1(&s->gb);
484 if (get_bits1(&s->gb)) { // reserved bit
485 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
486 return AVERROR_INVALIDDATA;
488 if (get_bits1(&s->gb)) {
489 *ref = get_bits(&s->gb, 3);
492 s->last_uses_2pass = s->uses_2pass;
493 s->last_keyframe = s->keyframe;
494 s->keyframe = !get_bits1(&s->gb);
495 last_invisible = s->invisible;
496 s->invisible = !get_bits1(&s->gb);
497 s->errorres = get_bits1(&s->gb);
498 s->use_last_frame_mvs = !s->errorres && !last_invisible;
500 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
501 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
502 return AVERROR_INVALIDDATA;
504 s->colorspace = get_bits(&s->gb, 3);
505 if (s->colorspace == 7) { // RGB = profile 1
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
507 return AVERROR_INVALIDDATA;
509 s->fullrange = get_bits1(&s->gb);
510 // for profile 1, here follows the subsampling bits
511 s->refreshrefmask = 0xff;
512 w = get_bits(&s->gb, 16) + 1;
513 h = get_bits(&s->gb, 16) + 1;
514 if (get_bits1(&s->gb)) // display size
515 skip_bits(&s->gb, 32);
517 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
518 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
520 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
521 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
522 return AVERROR_INVALIDDATA;
524 s->refreshrefmask = get_bits(&s->gb, 8);
525 w = get_bits(&s->gb, 16) + 1;
526 h = get_bits(&s->gb, 16) + 1;
527 if (get_bits1(&s->gb)) // display size
528 skip_bits(&s->gb, 32);
530 s->refreshrefmask = get_bits(&s->gb, 8);
531 s->refidx[0] = get_bits(&s->gb, 3);
532 s->signbias[0] = get_bits1(&s->gb);
533 s->refidx[1] = get_bits(&s->gb, 3);
534 s->signbias[1] = get_bits1(&s->gb);
535 s->refidx[2] = get_bits(&s->gb, 3);
536 s->signbias[2] = get_bits1(&s->gb);
537 if (!s->refs[s->refidx[0]].f->data[0] ||
538 !s->refs[s->refidx[1]].f->data[0] ||
539 !s->refs[s->refidx[2]].f->data[0]) {
540 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
541 return AVERROR_INVALIDDATA;
543 if (get_bits1(&s->gb)) {
544 w = s->refs[s->refidx[0]].f->width;
545 h = s->refs[s->refidx[0]].f->height;
546 } else if (get_bits1(&s->gb)) {
547 w = s->refs[s->refidx[1]].f->width;
548 h = s->refs[s->refidx[1]].f->height;
549 } else if (get_bits1(&s->gb)) {
550 w = s->refs[s->refidx[2]].f->width;
551 h = s->refs[s->refidx[2]].f->height;
553 w = get_bits(&s->gb, 16) + 1;
554 h = get_bits(&s->gb, 16) + 1;
556 // Note that in this code, "CUR_FRAME" is actually before we
557 // have formally allocated a frame, and thus actually represents
559 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
560 s->frames[CUR_FRAME].tf.f->height == h;
561 if (get_bits1(&s->gb)) // display size
562 skip_bits(&s->gb, 32);
563 s->highprecisionmvs = get_bits1(&s->gb);
564 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
566 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
567 s->signbias[0] != s->signbias[2];
568 if (s->allowcompinter) {
569 if (s->signbias[0] == s->signbias[1]) {
571 s->varcompref[0] = 0;
572 s->varcompref[1] = 1;
573 } else if (s->signbias[0] == s->signbias[2]) {
575 s->varcompref[0] = 0;
576 s->varcompref[1] = 2;
579 s->varcompref[0] = 1;
580 s->varcompref[1] = 2;
585 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
586 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
587 s->framectxid = c = get_bits(&s->gb, 2);
589 /* loopfilter header data */
590 s->filter.level = get_bits(&s->gb, 6);
591 sharp = get_bits(&s->gb, 3);
592 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
593 // the old cache values since they are still valid
594 if (s->filter.sharpness != sharp)
595 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
596 s->filter.sharpness = sharp;
597 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
598 if (get_bits1(&s->gb)) {
599 for (i = 0; i < 4; i++)
600 if (get_bits1(&s->gb))
601 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
602 for (i = 0; i < 2; i++)
603 if (get_bits1(&s->gb))
604 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
607 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
610 /* quantization header data */
611 s->yac_qi = get_bits(&s->gb, 8);
612 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
613 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
614 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
615 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
616 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
618 /* segmentation header info */
619 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
620 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
621 for (i = 0; i < 7; i++)
622 s->prob.seg[i] = get_bits1(&s->gb) ?
623 get_bits(&s->gb, 8) : 255;
624 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
625 for (i = 0; i < 3; i++)
626 s->prob.segpred[i] = get_bits1(&s->gb) ?
627 get_bits(&s->gb, 8) : 255;
630 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
631 (w != s->frames[CUR_FRAME].tf.f->width ||
632 h != s->frames[CUR_FRAME].tf.f->height)) {
633 av_log(ctx, AV_LOG_ERROR,
634 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
635 s->segmentation.temporal, s->segmentation.update_map);
636 return AVERROR_INVALIDDATA;
639 if (get_bits1(&s->gb)) {
640 s->segmentation.absolute_vals = get_bits1(&s->gb);
641 for (i = 0; i < 8; i++) {
642 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
643 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
644 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
645 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
646 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
647 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
648 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
652 s->segmentation.feat[0].q_enabled = 0;
653 s->segmentation.feat[0].lf_enabled = 0;
654 s->segmentation.feat[0].skip_enabled = 0;
655 s->segmentation.feat[0].ref_enabled = 0;
658 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
659 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
660 int qyac, qydc, quvac, quvdc, lflvl, sh;
662 if (s->segmentation.feat[i].q_enabled) {
663 if (s->segmentation.absolute_vals)
664 qyac = s->segmentation.feat[i].q_val;
666 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
670 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
671 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
672 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
673 qyac = av_clip_uintp2(qyac, 8);
675 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
676 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
677 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
678 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
680 sh = s->filter.level >= 32;
681 if (s->segmentation.feat[i].lf_enabled) {
682 if (s->segmentation.absolute_vals)
683 lflvl = s->segmentation.feat[i].lf_val;
685 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
687 lflvl = s->filter.level;
689 s->segmentation.feat[i].lflvl[0][0] =
690 s->segmentation.feat[i].lflvl[0][1] =
691 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
692 for (j = 1; j < 4; j++) {
693 s->segmentation.feat[i].lflvl[j][0] =
694 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
695 s->lf_delta.mode[0]) << sh), 6);
696 s->segmentation.feat[i].lflvl[j][1] =
697 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
698 s->lf_delta.mode[1]) << sh), 6);
703 if ((res = update_size(ctx, w, h)) < 0) {
704 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
707 for (s->tiling.log2_tile_cols = 0;
708 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
709 s->tiling.log2_tile_cols++) ;
710 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
711 max = FFMAX(0, max - 1);
712 while (max > s->tiling.log2_tile_cols) {
713 if (get_bits1(&s->gb))
714 s->tiling.log2_tile_cols++;
718 s->tiling.log2_tile_rows = decode012(&s->gb);
719 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
720 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
721 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
722 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
723 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
725 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
726 return AVERROR(ENOMEM);
730 if (s->keyframe || s->errorres || s->intraonly) {
731 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
732 s->prob_ctx[3].p = vp9_default_probs;
733 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
734 sizeof(vp9_default_coef_probs));
735 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
736 sizeof(vp9_default_coef_probs));
737 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
738 sizeof(vp9_default_coef_probs));
739 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
740 sizeof(vp9_default_coef_probs));
743 // next 16 bits is size of the rest of the header (arith-coded)
744 size2 = get_bits(&s->gb, 16);
745 data2 = align_get_bits(&s->gb);
746 if (size2 > size - (data2 - data)) {
747 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
748 return AVERROR_INVALIDDATA;
750 ff_vp56_init_range_decoder(&s->c, data2, size2);
751 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
752 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
753 return AVERROR_INVALIDDATA;
756 if (s->keyframe || s->intraonly) {
757 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
759 memset(&s->counts, 0, sizeof(s->counts));
761 // FIXME is it faster to not copy here, but do it down in the fw updates
762 // as explicit copies if the fw update is missing (and skip the copy upon
764 s->prob.p = s->prob_ctx[c].p;
768 s->txfmmode = TX_4X4;
770 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
771 if (s->txfmmode == 3)
772 s->txfmmode += vp8_rac_get(&s->c);
774 if (s->txfmmode == TX_SWITCHABLE) {
775 for (i = 0; i < 2; i++)
776 if (vp56_rac_get_prob_branchy(&s->c, 252))
777 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
778 for (i = 0; i < 2; i++)
779 for (j = 0; j < 2; j++)
780 if (vp56_rac_get_prob_branchy(&s->c, 252))
781 s->prob.p.tx16p[i][j] =
782 update_prob(&s->c, s->prob.p.tx16p[i][j]);
783 for (i = 0; i < 2; i++)
784 for (j = 0; j < 3; j++)
785 if (vp56_rac_get_prob_branchy(&s->c, 252))
786 s->prob.p.tx32p[i][j] =
787 update_prob(&s->c, s->prob.p.tx32p[i][j]);
792 for (i = 0; i < 4; i++) {
793 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
794 if (vp8_rac_get(&s->c)) {
795 for (j = 0; j < 2; j++)
796 for (k = 0; k < 2; k++)
797 for (l = 0; l < 6; l++)
798 for (m = 0; m < 6; m++) {
799 uint8_t *p = s->prob.coef[i][j][k][l][m];
800 uint8_t *r = ref[j][k][l][m];
801 if (m >= 3 && l == 0) // dc only has 3 pt
803 for (n = 0; n < 3; n++) {
804 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
805 p[n] = update_prob(&s->c, r[n]);
813 for (j = 0; j < 2; j++)
814 for (k = 0; k < 2; k++)
815 for (l = 0; l < 6; l++)
816 for (m = 0; m < 6; m++) {
817 uint8_t *p = s->prob.coef[i][j][k][l][m];
818 uint8_t *r = ref[j][k][l][m];
819 if (m > 3 && l == 0) // dc only has 3 pt
825 if (s->txfmmode == i)
830 for (i = 0; i < 3; i++)
831 if (vp56_rac_get_prob_branchy(&s->c, 252))
832 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
833 if (!s->keyframe && !s->intraonly) {
834 for (i = 0; i < 7; i++)
835 for (j = 0; j < 3; j++)
836 if (vp56_rac_get_prob_branchy(&s->c, 252))
837 s->prob.p.mv_mode[i][j] =
838 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
840 if (s->filtermode == FILTER_SWITCHABLE)
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 2; j++)
843 if (vp56_rac_get_prob_branchy(&s->c, 252))
844 s->prob.p.filter[i][j] =
845 update_prob(&s->c, s->prob.p.filter[i][j]);
847 for (i = 0; i < 4; i++)
848 if (vp56_rac_get_prob_branchy(&s->c, 252))
849 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
851 if (s->allowcompinter) {
852 s->comppredmode = vp8_rac_get(&s->c);
854 s->comppredmode += vp8_rac_get(&s->c);
855 if (s->comppredmode == PRED_SWITCHABLE)
856 for (i = 0; i < 5; i++)
857 if (vp56_rac_get_prob_branchy(&s->c, 252))
859 update_prob(&s->c, s->prob.p.comp[i]);
861 s->comppredmode = PRED_SINGLEREF;
864 if (s->comppredmode != PRED_COMPREF) {
865 for (i = 0; i < 5; i++) {
866 if (vp56_rac_get_prob_branchy(&s->c, 252))
867 s->prob.p.single_ref[i][0] =
868 update_prob(&s->c, s->prob.p.single_ref[i][0]);
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.single_ref[i][1] =
871 update_prob(&s->c, s->prob.p.single_ref[i][1]);
875 if (s->comppredmode != PRED_SINGLEREF) {
876 for (i = 0; i < 5; i++)
877 if (vp56_rac_get_prob_branchy(&s->c, 252))
878 s->prob.p.comp_ref[i] =
879 update_prob(&s->c, s->prob.p.comp_ref[i]);
882 for (i = 0; i < 4; i++)
883 for (j = 0; j < 9; j++)
884 if (vp56_rac_get_prob_branchy(&s->c, 252))
885 s->prob.p.y_mode[i][j] =
886 update_prob(&s->c, s->prob.p.y_mode[i][j]);
888 for (i = 0; i < 4; i++)
889 for (j = 0; j < 4; j++)
890 for (k = 0; k < 3; k++)
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.partition[3 - i][j][k] =
893 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
895 // mv fields don't use the update_prob subexp model for some reason
896 for (i = 0; i < 3; i++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
900 for (i = 0; i < 2; i++) {
901 if (vp56_rac_get_prob_branchy(&s->c, 252))
902 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
904 for (j = 0; j < 10; j++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.mv_comp[i].classes[j] =
907 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
909 if (vp56_rac_get_prob_branchy(&s->c, 252))
910 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
912 for (j = 0; j < 10; j++)
913 if (vp56_rac_get_prob_branchy(&s->c, 252))
914 s->prob.p.mv_comp[i].bits[j] =
915 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
918 for (i = 0; i < 2; i++) {
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 3; k++)
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 s->prob.p.mv_comp[i].class0_fp[j][k] =
923 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
925 for (j = 0; j < 3; j++)
926 if (vp56_rac_get_prob_branchy(&s->c, 252))
927 s->prob.p.mv_comp[i].fp[j] =
928 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
931 if (s->highprecisionmvs) {
932 for (i = 0; i < 2; i++) {
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.mv_comp[i].class0_hp =
935 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.mv_comp[i].hp =
939 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
944 return (data2 - data) + size2;
947 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
950 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
951 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
954 static void find_ref_mvs(VP9Context *s,
955 VP56mv *pmv, int ref, int z, int idx, int sb)
957 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
958 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
959 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
960 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
961 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
962 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
963 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
964 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
965 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
966 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
967 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
969 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
970 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
971 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
972 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
973 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
974 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
975 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
976 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
977 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
978 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
986 int row = s->row, col = s->col, row7 = s->row7;
987 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
988 #define INVALID_MV 0x80008000U
989 uint32_t mem = INVALID_MV;
992 #define RETURN_DIRECT_MV(mv) \
994 uint32_t m = AV_RN32A(&mv); \
998 } else if (mem == INVALID_MV) { \
1000 } else if (m != mem) { \
1007 if (sb == 2 || sb == 1) {
1008 RETURN_DIRECT_MV(b->mv[0][z]);
1009 } else if (sb == 3) {
1010 RETURN_DIRECT_MV(b->mv[2][z]);
1011 RETURN_DIRECT_MV(b->mv[1][z]);
1012 RETURN_DIRECT_MV(b->mv[0][z]);
1015 #define RETURN_MV(mv) \
1020 clamp_mv(&tmp, &mv, s); \
1021 m = AV_RN32A(&tmp); \
1025 } else if (mem == INVALID_MV) { \
1027 } else if (m != mem) { \
1032 uint32_t m = AV_RN32A(&mv); \
1034 clamp_mv(pmv, &mv, s); \
1036 } else if (mem == INVALID_MV) { \
1038 } else if (m != mem) { \
1039 clamp_mv(pmv, &mv, s); \
1046 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1047 if (mv->ref[0] == ref) {
1048 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1049 } else if (mv->ref[1] == ref) {
1050 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1053 if (col > s->tiling.tile_col_start) {
1054 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1055 if (mv->ref[0] == ref) {
1056 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1057 } else if (mv->ref[1] == ref) {
1058 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1066 // previously coded MVs in this neighbourhood, using same reference frame
1067 for (; i < 8; i++) {
1068 int c = p[i][0] + col, r = p[i][1] + row;
1070 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1071 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1073 if (mv->ref[0] == ref) {
1074 RETURN_MV(mv->mv[0]);
1075 } else if (mv->ref[1] == ref) {
1076 RETURN_MV(mv->mv[1]);
1081 // MV at this position in previous frame, using same reference frame
1082 if (s->use_last_frame_mvs) {
1083 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1085 if (!s->last_uses_2pass)
1086 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1087 if (mv->ref[0] == ref) {
1088 RETURN_MV(mv->mv[0]);
1089 } else if (mv->ref[1] == ref) {
1090 RETURN_MV(mv->mv[1]);
1094 #define RETURN_SCALE_MV(mv, scale) \
1097 VP56mv mv_temp = { -mv.x, -mv.y }; \
1098 RETURN_MV(mv_temp); \
1104 // previously coded MVs in this neighbourhood, using different reference frame
1105 for (i = 0; i < 8; i++) {
1106 int c = p[i][0] + col, r = p[i][1] + row;
1108 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1109 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1111 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1112 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1114 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1115 // BUG - libvpx has this condition regardless of whether
1116 // we used the first ref MV and pre-scaling
1117 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1118 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1123 // MV at this position in previous frame, using different reference frame
1124 if (s->use_last_frame_mvs) {
1125 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1127 // no need to await_progress, because we already did that above
1128 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1129 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1131 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1132 // BUG - libvpx has this condition regardless of whether
1133 // we used the first ref MV and pre-scaling
1134 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1135 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1142 #undef RETURN_SCALE_MV
1145 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1147 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1148 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1149 s->prob.p.mv_comp[idx].classes);
1151 s->counts.mv_comp[idx].sign[sign]++;
1152 s->counts.mv_comp[idx].classes[c]++;
1156 for (n = 0, m = 0; m < c; m++) {
1157 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1159 s->counts.mv_comp[idx].bits[m][bit]++;
1162 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1164 s->counts.mv_comp[idx].fp[bit]++;
1166 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1167 s->counts.mv_comp[idx].hp[bit]++;
1171 // bug in libvpx - we count for bw entropy purposes even if the
1173 s->counts.mv_comp[idx].hp[1]++;
1177 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1178 s->counts.mv_comp[idx].class0[n]++;
1179 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1180 s->prob.p.mv_comp[idx].class0_fp[n]);
1181 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1182 n = (n << 3) | (bit << 1);
1184 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1185 s->counts.mv_comp[idx].class0_hp[bit]++;
1189 // bug in libvpx - we count for bw entropy purposes even if the
1191 s->counts.mv_comp[idx].class0_hp[1]++;
1195 return sign ? -(n + 1) : (n + 1);
1198 static void fill_mv(VP9Context *s,
1199 VP56mv *mv, int mode, int sb)
1203 if (mode == ZEROMV) {
1208 // FIXME cache this value and reuse for other subblocks
1209 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1210 mode == NEWMV ? -1 : sb);
1211 // FIXME maybe move this code into find_ref_mvs()
1212 if ((mode == NEWMV || sb == -1) &&
1213 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1227 if (mode == NEWMV) {
1228 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1229 s->prob.p.mv_joint);
1231 s->counts.mv_joint[j]++;
1232 if (j >= MV_JOINT_V)
1233 mv[0].y += read_mv_component(s, 0, hp);
1235 mv[0].x += read_mv_component(s, 1, hp);
1239 // FIXME cache this value and reuse for other subblocks
1240 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1241 mode == NEWMV ? -1 : sb);
1242 if ((mode == NEWMV || sb == -1) &&
1243 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1257 if (mode == NEWMV) {
1258 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1259 s->prob.p.mv_joint);
1261 s->counts.mv_joint[j]++;
1262 if (j >= MV_JOINT_V)
1263 mv[1].y += read_mv_component(s, 0, hp);
1265 mv[1].x += read_mv_component(s, 1, hp);
1271 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1272 ptrdiff_t stride, int v)
1282 int v16 = v * 0x0101;
1290 uint32_t v32 = v * 0x01010101;
1299 uint64_t v64 = v * 0x0101010101010101ULL;
1305 uint32_t v32 = v * 0x01010101;
1308 AV_WN32A(ptr + 4, v32);
1317 static void decode_mode(AVCodecContext *ctx)
1319 static const uint8_t left_ctx[N_BS_SIZES] = {
1320 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1322 static const uint8_t above_ctx[N_BS_SIZES] = {
1323 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1325 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1326 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1327 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1329 VP9Context *s = ctx->priv_data;
1331 int row = s->row, col = s->col, row7 = s->row7;
1332 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1333 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1334 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1335 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1336 int vref, filter_id;
1338 if (!s->segmentation.enabled) {
1340 } else if (s->keyframe || s->intraonly) {
1341 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1342 } else if (!s->segmentation.update_map ||
1343 (s->segmentation.temporal &&
1344 vp56_rac_get_prob_branchy(&s->c,
1345 s->prob.segpred[s->above_segpred_ctx[col] +
1346 s->left_segpred_ctx[row7]]))) {
1348 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1350 if (!s->last_uses_2pass)
1351 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1352 for (y = 0; y < h4; y++)
1353 for (x = 0; x < w4; x++)
1354 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1355 av_assert1(pred < 8);
1358 memset(&s->above_segpred_ctx[col], 1, w4);
1359 memset(&s->left_segpred_ctx[row7], 1, h4);
1361 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1364 memset(&s->above_segpred_ctx[col], 0, w4);
1365 memset(&s->left_segpred_ctx[row7], 0, h4);
1367 if (s->segmentation.enabled &&
1368 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1369 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1370 w4, h4, 8 * s->sb_cols, b->seg_id);
1373 b->skip = s->segmentation.enabled &&
1374 s->segmentation.feat[b->seg_id].skip_enabled;
1376 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1377 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1378 s->counts.skip[c][b->skip]++;
1381 if (s->keyframe || s->intraonly) {
1383 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1384 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1388 if (have_a && have_l) {
1389 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1392 c = have_a ? 2 * s->above_intra_ctx[col] :
1393 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1395 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1396 s->counts.intra[c][bit]++;
1400 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1404 c = (s->above_skip_ctx[col] ? max_tx :
1405 s->above_txfm_ctx[col]) +
1406 (s->left_skip_ctx[row7] ? max_tx :
1407 s->left_txfm_ctx[row7]) > max_tx;
1409 c = s->above_skip_ctx[col] ? 1 :
1410 (s->above_txfm_ctx[col] * 2 > max_tx);
1412 } else if (have_l) {
1413 c = s->left_skip_ctx[row7] ? 1 :
1414 (s->left_txfm_ctx[row7] * 2 > max_tx);
1420 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1422 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1424 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1426 s->counts.tx32p[c][b->tx]++;
1429 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1431 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1432 s->counts.tx16p[c][b->tx]++;
1435 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1436 s->counts.tx8p[c][b->tx]++;
1443 b->tx = FFMIN(max_tx, s->txfmmode);
1446 if (s->keyframe || s->intraonly) {
1447 uint8_t *a = &s->above_mode_ctx[col * 2];
1448 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1451 if (b->bs > BS_8x8) {
1452 // FIXME the memory storage intermediates here aren't really
1453 // necessary, they're just there to make the code slightly
1455 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1456 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1457 if (b->bs != BS_8x4) {
1458 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1459 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1460 l[0] = a[1] = b->mode[1];
1462 l[0] = a[1] = b->mode[1] = b->mode[0];
1464 if (b->bs != BS_4x8) {
1465 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1466 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1467 if (b->bs != BS_8x4) {
1468 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1469 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1470 l[1] = a[1] = b->mode[3];
1472 l[1] = a[1] = b->mode[3] = b->mode[2];
1475 b->mode[2] = b->mode[0];
1476 l[1] = a[1] = b->mode[3] = b->mode[1];
1479 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1480 vp9_default_kf_ymode_probs[*a][*l]);
1481 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1482 // FIXME this can probably be optimized
1483 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1484 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1486 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1487 vp9_default_kf_uvmode_probs[b->mode[3]]);
1488 } else if (b->intra) {
1490 if (b->bs > BS_8x8) {
1491 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1492 s->prob.p.y_mode[0]);
1493 s->counts.y_mode[0][b->mode[0]]++;
1494 if (b->bs != BS_8x4) {
1495 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1496 s->prob.p.y_mode[0]);
1497 s->counts.y_mode[0][b->mode[1]]++;
1499 b->mode[1] = b->mode[0];
1501 if (b->bs != BS_4x8) {
1502 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1503 s->prob.p.y_mode[0]);
1504 s->counts.y_mode[0][b->mode[2]]++;
1505 if (b->bs != BS_8x4) {
1506 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1507 s->prob.p.y_mode[0]);
1508 s->counts.y_mode[0][b->mode[3]]++;
1510 b->mode[3] = b->mode[2];
1513 b->mode[2] = b->mode[0];
1514 b->mode[3] = b->mode[1];
1517 static const uint8_t size_group[10] = {
1518 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1520 int sz = size_group[b->bs];
1522 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1523 s->prob.p.y_mode[sz]);
1524 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1525 s->counts.y_mode[sz][b->mode[3]]++;
1527 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1528 s->prob.p.uv_mode[b->mode[3]]);
1529 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1531 static const uint8_t inter_mode_ctx_lut[14][14] = {
1532 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1533 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1534 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1535 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1543 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1544 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1545 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1548 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1549 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1551 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1553 // read comp_pred flag
1554 if (s->comppredmode != PRED_SWITCHABLE) {
1555 b->comp = s->comppredmode == PRED_COMPREF;
1559 // FIXME add intra as ref=0xff (or -1) to make these easier?
1562 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1564 } else if (s->above_comp_ctx[col]) {
1565 c = 2 + (s->left_intra_ctx[row7] ||
1566 s->left_ref_ctx[row7] == s->fixcompref);
1567 } else if (s->left_comp_ctx[row7]) {
1568 c = 2 + (s->above_intra_ctx[col] ||
1569 s->above_ref_ctx[col] == s->fixcompref);
1571 c = (!s->above_intra_ctx[col] &&
1572 s->above_ref_ctx[col] == s->fixcompref) ^
1573 (!s->left_intra_ctx[row7] &&
1574 s->left_ref_ctx[row & 7] == s->fixcompref);
1577 c = s->above_comp_ctx[col] ? 3 :
1578 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1580 } else if (have_l) {
1581 c = s->left_comp_ctx[row7] ? 3 :
1582 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1586 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1587 s->counts.comp[c][b->comp]++;
1590 // read actual references
1591 // FIXME probably cache a few variables here to prevent repetitive
1592 // memory accesses below
1593 if (b->comp) /* two references */ {
1594 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1596 b->ref[fix_idx] = s->fixcompref;
1597 // FIXME can this codeblob be replaced by some sort of LUT?
1600 if (s->above_intra_ctx[col]) {
1601 if (s->left_intra_ctx[row7]) {
1604 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1606 } else if (s->left_intra_ctx[row7]) {
1607 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1609 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1611 if (refl == refa && refa == s->varcompref[1]) {
1613 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1614 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1615 (refl == s->fixcompref && refa == s->varcompref[0])) {
1618 c = (refa == refl) ? 3 : 1;
1620 } else if (!s->left_comp_ctx[row7]) {
1621 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1624 c = (refl == s->varcompref[1] &&
1625 refa != s->varcompref[1]) ? 2 : 4;
1627 } else if (!s->above_comp_ctx[col]) {
1628 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1631 c = (refa == s->varcompref[1] &&
1632 refl != s->varcompref[1]) ? 2 : 4;
1635 c = (refl == refa) ? 4 : 2;
1639 if (s->above_intra_ctx[col]) {
1641 } else if (s->above_comp_ctx[col]) {
1642 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1644 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1647 } else if (have_l) {
1648 if (s->left_intra_ctx[row7]) {
1650 } else if (s->left_comp_ctx[row7]) {
1651 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1653 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1658 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1659 b->ref[var_idx] = s->varcompref[bit];
1660 s->counts.comp_ref[c][bit]++;
1661 } else /* single reference */ {
1664 if (have_a && !s->above_intra_ctx[col]) {
1665 if (have_l && !s->left_intra_ctx[row7]) {
1666 if (s->left_comp_ctx[row7]) {
1667 if (s->above_comp_ctx[col]) {
1668 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1669 !s->above_ref_ctx[col]);
1671 c = (3 * !s->above_ref_ctx[col]) +
1672 (!s->fixcompref || !s->left_ref_ctx[row7]);
1674 } else if (s->above_comp_ctx[col]) {
1675 c = (3 * !s->left_ref_ctx[row7]) +
1676 (!s->fixcompref || !s->above_ref_ctx[col]);
1678 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1680 } else if (s->above_intra_ctx[col]) {
1682 } else if (s->above_comp_ctx[col]) {
1683 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1685 c = 4 * (!s->above_ref_ctx[col]);
1687 } else if (have_l && !s->left_intra_ctx[row7]) {
1688 if (s->left_intra_ctx[row7]) {
1690 } else if (s->left_comp_ctx[row7]) {
1691 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1693 c = 4 * (!s->left_ref_ctx[row7]);
1698 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1699 s->counts.single_ref[c][0][bit]++;
1703 // FIXME can this codeblob be replaced by some sort of LUT?
1706 if (s->left_intra_ctx[row7]) {
1707 if (s->above_intra_ctx[col]) {
1709 } else if (s->above_comp_ctx[col]) {
1710 c = 1 + 2 * (s->fixcompref == 1 ||
1711 s->above_ref_ctx[col] == 1);
1712 } else if (!s->above_ref_ctx[col]) {
1715 c = 4 * (s->above_ref_ctx[col] == 1);
1717 } else if (s->above_intra_ctx[col]) {
1718 if (s->left_intra_ctx[row7]) {
1720 } else if (s->left_comp_ctx[row7]) {
1721 c = 1 + 2 * (s->fixcompref == 1 ||
1722 s->left_ref_ctx[row7] == 1);
1723 } else if (!s->left_ref_ctx[row7]) {
1726 c = 4 * (s->left_ref_ctx[row7] == 1);
1728 } else if (s->above_comp_ctx[col]) {
1729 if (s->left_comp_ctx[row7]) {
1730 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1731 c = 3 * (s->fixcompref == 1 ||
1732 s->left_ref_ctx[row7] == 1);
1736 } else if (!s->left_ref_ctx[row7]) {
1737 c = 1 + 2 * (s->fixcompref == 1 ||
1738 s->above_ref_ctx[col] == 1);
1740 c = 3 * (s->left_ref_ctx[row7] == 1) +
1741 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1743 } else if (s->left_comp_ctx[row7]) {
1744 if (!s->above_ref_ctx[col]) {
1745 c = 1 + 2 * (s->fixcompref == 1 ||
1746 s->left_ref_ctx[row7] == 1);
1748 c = 3 * (s->above_ref_ctx[col] == 1) +
1749 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1751 } else if (!s->above_ref_ctx[col]) {
1752 if (!s->left_ref_ctx[row7]) {
1755 c = 4 * (s->left_ref_ctx[row7] == 1);
1757 } else if (!s->left_ref_ctx[row7]) {
1758 c = 4 * (s->above_ref_ctx[col] == 1);
1760 c = 2 * (s->left_ref_ctx[row7] == 1) +
1761 2 * (s->above_ref_ctx[col] == 1);
1764 if (s->above_intra_ctx[col] ||
1765 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1767 } else if (s->above_comp_ctx[col]) {
1768 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1770 c = 4 * (s->above_ref_ctx[col] == 1);
1773 } else if (have_l) {
1774 if (s->left_intra_ctx[row7] ||
1775 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1777 } else if (s->left_comp_ctx[row7]) {
1778 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1780 c = 4 * (s->left_ref_ctx[row7] == 1);
1785 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1786 s->counts.single_ref[c][1][bit]++;
1787 b->ref[0] = 1 + bit;
1792 if (b->bs <= BS_8x8) {
1793 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1794 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1796 static const uint8_t off[10] = {
1797 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1800 // FIXME this needs to use the LUT tables from find_ref_mvs
1801 // because not all are -1,0/0,-1
1802 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1803 [s->left_mode_ctx[row7 + off[b->bs]]];
1805 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1806 s->prob.p.mv_mode[c]);
1807 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1808 s->counts.mv_mode[c][b->mode[0] - 10]++;
1812 if (s->filtermode == FILTER_SWITCHABLE) {
1815 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1816 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1817 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1818 s->left_filter_ctx[row7] : 3;
1820 c = s->above_filter_ctx[col];
1822 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1823 c = s->left_filter_ctx[row7];
1828 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1829 s->prob.p.filter[c]);
1830 s->counts.filter[c][filter_id]++;
1831 b->filter = vp9_filter_lut[filter_id];
1833 b->filter = s->filtermode;
1836 if (b->bs > BS_8x8) {
1837 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1839 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1840 s->prob.p.mv_mode[c]);
1841 s->counts.mv_mode[c][b->mode[0] - 10]++;
1842 fill_mv(s, b->mv[0], b->mode[0], 0);
1844 if (b->bs != BS_8x4) {
1845 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1846 s->prob.p.mv_mode[c]);
1847 s->counts.mv_mode[c][b->mode[1] - 10]++;
1848 fill_mv(s, b->mv[1], b->mode[1], 1);
1850 b->mode[1] = b->mode[0];
1851 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1852 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1855 if (b->bs != BS_4x8) {
1856 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1857 s->prob.p.mv_mode[c]);
1858 s->counts.mv_mode[c][b->mode[2] - 10]++;
1859 fill_mv(s, b->mv[2], b->mode[2], 2);
1861 if (b->bs != BS_8x4) {
1862 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1863 s->prob.p.mv_mode[c]);
1864 s->counts.mv_mode[c][b->mode[3] - 10]++;
1865 fill_mv(s, b->mv[3], b->mode[3], 3);
1867 b->mode[3] = b->mode[2];
1868 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1869 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1872 b->mode[2] = b->mode[0];
1873 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1874 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1875 b->mode[3] = b->mode[1];
1876 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1877 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1880 fill_mv(s, b->mv[0], b->mode[0], -1);
1881 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1882 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1883 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1884 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1885 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1886 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1889 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1893 #define SPLAT_CTX(var, val, n) \
1895 case 1: var = val; break; \
1896 case 2: AV_WN16A(&var, val * 0x0101); break; \
1897 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1898 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1900 uint64_t v64 = val * 0x0101010101010101ULL; \
1901 AV_WN64A( &var, v64); \
1902 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1907 #define SPLAT_CTX(var, val, n) \
1909 case 1: var = val; break; \
1910 case 2: AV_WN16A(&var, val * 0x0101); break; \
1911 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1913 uint32_t v32 = val * 0x01010101; \
1914 AV_WN32A( &var, v32); \
1915 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1919 uint32_t v32 = val * 0x01010101; \
1920 AV_WN32A( &var, v32); \
1921 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1922 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1923 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1929 switch (bwh_tab[1][b->bs][0]) {
1930 #define SET_CTXS(dir, off, n) \
1932 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1933 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1934 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1935 if (!s->keyframe && !s->intraonly) { \
1936 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1937 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1938 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1940 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1941 if (s->filtermode == FILTER_SWITCHABLE) { \
1942 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1947 case 1: SET_CTXS(above, col, 1); break;
1948 case 2: SET_CTXS(above, col, 2); break;
1949 case 4: SET_CTXS(above, col, 4); break;
1950 case 8: SET_CTXS(above, col, 8); break;
1952 switch (bwh_tab[1][b->bs][1]) {
1953 case 1: SET_CTXS(left, row7, 1); break;
1954 case 2: SET_CTXS(left, row7, 2); break;
1955 case 4: SET_CTXS(left, row7, 4); break;
1956 case 8: SET_CTXS(left, row7, 8); break;
1961 if (!s->keyframe && !s->intraonly) {
1962 if (b->bs > BS_8x8) {
1963 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1965 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1966 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1967 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1968 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1969 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1970 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1971 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1972 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1974 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1976 for (n = 0; n < w4 * 2; n++) {
1977 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1978 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1980 for (n = 0; n < h4 * 2; n++) {
1981 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1982 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1988 for (y = 0; y < h4; y++) {
1989 int x, o = (row + y) * s->sb_cols * 8 + col;
1990 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1993 for (x = 0; x < w4; x++) {
1997 } else if (b->comp) {
1998 for (x = 0; x < w4; x++) {
1999 mv[x].ref[0] = b->ref[0];
2000 mv[x].ref[1] = b->ref[1];
2001 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2002 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2005 for (x = 0; x < w4; x++) {
2006 mv[x].ref[0] = b->ref[0];
2008 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2014 // FIXME remove tx argument, and merge cnt/eob arguments?
2015 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2016 enum TxfmMode tx, unsigned (*cnt)[6][3],
2017 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2018 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2019 const int16_t *band_counts, const int16_t *qmul)
2021 int i = 0, band = 0, band_left = band_counts[band];
2022 uint8_t *tp = p[0][nnz];
2023 uint8_t cache[1024];
2028 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2029 eob[band][nnz][val]++;
2034 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2035 cnt[band][nnz][0]++;
2037 band_left = band_counts[++band];
2039 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2041 if (++i == n_coeffs)
2042 break; //invalid input; blocks should end with EOB
2047 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2048 cnt[band][nnz][1]++;
2052 // fill in p[3-10] (model fill) - only once per frame for each pos
2054 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2056 cnt[band][nnz][2]++;
2057 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2058 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2059 cache[rc] = val = 2;
2061 val = 3 + vp56_rac_get_prob(c, tp[5]);
2064 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2066 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2067 val = 5 + vp56_rac_get_prob(c, 159);
2069 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2070 val += vp56_rac_get_prob(c, 145);
2074 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2075 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2076 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2077 val += (vp56_rac_get_prob(c, 148) << 1);
2078 val += vp56_rac_get_prob(c, 140);
2080 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2081 val += (vp56_rac_get_prob(c, 155) << 2);
2082 val += (vp56_rac_get_prob(c, 140) << 1);
2083 val += vp56_rac_get_prob(c, 135);
2085 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2086 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2087 val += (vp56_rac_get_prob(c, 157) << 3);
2088 val += (vp56_rac_get_prob(c, 141) << 2);
2089 val += (vp56_rac_get_prob(c, 134) << 1);
2090 val += vp56_rac_get_prob(c, 130);
2092 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2093 val += (vp56_rac_get_prob(c, 254) << 12);
2094 val += (vp56_rac_get_prob(c, 254) << 11);
2095 val += (vp56_rac_get_prob(c, 252) << 10);
2096 val += (vp56_rac_get_prob(c, 249) << 9);
2097 val += (vp56_rac_get_prob(c, 243) << 8);
2098 val += (vp56_rac_get_prob(c, 230) << 7);
2099 val += (vp56_rac_get_prob(c, 196) << 6);
2100 val += (vp56_rac_get_prob(c, 177) << 5);
2101 val += (vp56_rac_get_prob(c, 153) << 4);
2102 val += (vp56_rac_get_prob(c, 140) << 3);
2103 val += (vp56_rac_get_prob(c, 133) << 2);
2104 val += (vp56_rac_get_prob(c, 130) << 1);
2105 val += vp56_rac_get_prob(c, 129);
2110 band_left = band_counts[++band];
2111 if (tx == TX_32X32) // FIXME slow
2112 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2114 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2115 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2117 } while (++i < n_coeffs);
2122 static void decode_coeffs(AVCodecContext *ctx)
2124 VP9Context *s = ctx->priv_data;
2126 int row = s->row, col = s->col;
2127 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2128 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2129 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2130 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2131 int end_x = FFMIN(2 * (s->cols - col), w4);
2132 int end_y = FFMIN(2 * (s->rows - row), h4);
2133 int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
2134 int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
2135 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2136 int tx = 4 * s->lossless + b->tx;
2137 const int16_t * const *yscans = vp9_scans[tx];
2138 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2139 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2140 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2141 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2142 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2143 static const int16_t band_counts[4][8] = {
2144 { 1, 2, 3, 4, 3, 16 - 13 },
2145 { 1, 2, 3, 4, 11, 64 - 21 },
2146 { 1, 2, 3, 4, 11, 256 - 21 },
2147 { 1, 2, 3, 4, 11, 1024 - 21 },
2149 const int16_t *y_band_counts = band_counts[b->tx];
2150 const int16_t *uv_band_counts = band_counts[b->uvtx];
2152 #define MERGE(la, end, step, rd) \
2153 for (n = 0; n < end; n += step) \
2154 la[n] = !!rd(&la[n])
2155 #define MERGE_CTX(step, rd) \
2157 MERGE(l, end_y, step, rd); \
2158 MERGE(a, end_x, step, rd); \
2163 case TX_8X8: MERGE_CTX(2, AV_RN16A); break;
2164 case TX_16X16: MERGE_CTX(4, AV_RN32A); break;
2165 case TX_32X32: MERGE_CTX(8, AV_RN64A); break;
2167 for (n = 0, y = 0; y < end_y; y += step1d) {
2168 for (x = 0; x < end_x; x += step1d, n += step) {
2169 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
2172 int nnz = a[x] + l[y];
2173 res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
2174 b->tx, c, e, p, nnz, yscans[txtp],
2175 ynbs[txtp], y_band_counts, qmul[0]);
2176 a[x] = l[y] = !!res;
2177 if (b->tx > TX_8X8) {
2178 AV_WN16A(&s->eob[n], res);
2184 #define SPLAT(la, end, step, cond) \
2186 for (n = 1; n < end; n += step) \
2187 la[n] = la[n - 1]; \
2188 } else if (step == 4) { \
2190 for (n = 0; n < end; n += step) \
2191 AV_WN32A(&la[n], la[n] * 0x01010101); \
2193 for (n = 0; n < end; n += step) \
2194 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2196 } else /* step == 8 */ { \
2198 if (HAVE_FAST_64BIT) { \
2199 for (n = 0; n < end; n += step) \
2200 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2202 for (n = 0; n < end; n += step) { \
2203 uint32_t v32 = la[n] * 0x01010101; \
2204 AV_WN32A(&la[n], v32); \
2205 AV_WN32A(&la[n + 4], v32); \
2209 for (n = 0; n < end; n += step) \
2210 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2213 #define SPLAT_CTX(step) \
2215 SPLAT(a, end_x, step, end_x == w4); \
2216 SPLAT(l, end_y, step, end_y == h4); \
2219 case TX_8X8: SPLAT_CTX(2); break;
2220 case TX_16X16: SPLAT_CTX(4); break;
2221 case TX_32X32: SPLAT_CTX(8); break;
2224 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2225 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2226 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2231 for (pl = 0; pl < 2; pl++) {
2232 a = &s->above_uv_nnz_ctx[pl][col];
2233 l = &s->left_uv_nnz_ctx[pl][row & 7];
2235 case TX_8X8: MERGE_CTX(2, AV_RN16A); break;
2236 case TX_16X16: MERGE_CTX(4, AV_RN32A); break;
2237 case TX_32X32: MERGE_CTX(8, AV_RN64A); break;
2239 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2240 for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
2241 int nnz = a[x] + l[y];
2242 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
2243 16 * uvstep, b->uvtx, c, e, p, nnz,
2244 uvscan, uvnb, uv_band_counts, qmul[1]);
2245 a[x] = l[y] = !!res;
2246 if (b->uvtx > TX_8X8) {
2247 AV_WN16A(&s->uveob[pl][n], res);
2249 s->uveob[pl][n] = res;
2254 case TX_8X8: SPLAT_CTX(2); break;
2255 case TX_16X16: SPLAT_CTX(4); break;
2256 case TX_32X32: SPLAT_CTX(8); break;
2261 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2262 uint8_t *dst_edge, ptrdiff_t stride_edge,
2263 uint8_t *dst_inner, ptrdiff_t stride_inner,
2264 uint8_t *l, int col, int x, int w,
2265 int row, int y, enum TxfmMode tx,
2268 int have_top = row > 0 || y > 0;
2269 int have_left = col > s->tiling.tile_col_start || x > 0;
2270 int have_right = x < w - 1;
2271 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2272 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2273 { DC_127_PRED, VERT_PRED } },
2274 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2275 { HOR_PRED, HOR_PRED } },
2276 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2277 { LEFT_DC_PRED, DC_PRED } },
2278 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2279 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2280 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2281 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2282 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2283 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2284 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2285 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2286 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2287 { DC_127_PRED, VERT_LEFT_PRED } },
2288 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2289 { HOR_UP_PRED, HOR_UP_PRED } },
2290 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2291 { HOR_PRED, TM_VP8_PRED } },
2293 static const struct {
2294 uint8_t needs_left:1;
2295 uint8_t needs_top:1;
2296 uint8_t needs_topleft:1;
2297 uint8_t needs_topright:1;
2298 } edges[N_INTRA_PRED_MODES] = {
2299 [VERT_PRED] = { .needs_top = 1 },
2300 [HOR_PRED] = { .needs_left = 1 },
2301 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2302 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2303 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2304 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2305 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2306 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2307 [HOR_UP_PRED] = { .needs_left = 1 },
2308 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2309 [LEFT_DC_PRED] = { .needs_left = 1 },
2310 [TOP_DC_PRED] = { .needs_top = 1 },
2311 [DC_128_PRED] = { 0 },
2312 [DC_127_PRED] = { 0 },
2313 [DC_129_PRED] = { 0 }
2316 av_assert2(mode >= 0 && mode < 10);
2317 mode = mode_conv[mode][have_left][have_top];
2318 if (edges[mode].needs_top) {
2319 uint8_t *top, *topleft;
2320 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2321 int n_px_need_tr = 0;
2323 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2326 // if top of sb64-row, use s->intra_pred_data[] instead of
2327 // dst[-stride] for intra prediction (it contains pre- instead of
2328 // post-loopfilter data)
2330 top = !(row & 7) && !y ?
2331 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2332 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2334 topleft = !(row & 7) && !y ?
2335 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2336 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2337 &dst_inner[-stride_inner];
2341 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2342 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2343 n_px_need + n_px_need_tr <= n_px_have) {
2347 if (n_px_need <= n_px_have) {
2348 memcpy(*a, top, n_px_need);
2350 memcpy(*a, top, n_px_have);
2351 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2352 n_px_need - n_px_have);
2355 memset(*a, 127, n_px_need);
2357 if (edges[mode].needs_topleft) {
2358 if (have_left && have_top) {
2359 (*a)[-1] = topleft[-1];
2361 (*a)[-1] = have_top ? 129 : 127;
2364 if (tx == TX_4X4 && edges[mode].needs_topright) {
2365 if (have_top && have_right &&
2366 n_px_need + n_px_need_tr <= n_px_have) {
2367 memcpy(&(*a)[4], &top[4], 4);
2369 memset(&(*a)[4], (*a)[3], 4);
2374 if (edges[mode].needs_left) {
2376 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2377 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2378 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2380 if (n_px_need <= n_px_have) {
2381 for (i = 0; i < n_px_need; i++)
2382 l[n_px_need - 1 - i] = dst[i * stride - 1];
2384 for (i = 0; i < n_px_have; i++)
2385 l[n_px_need - 1 - i] = dst[i * stride - 1];
2386 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2389 memset(l, 129, 4 << tx);
2396 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2398 VP9Context *s = ctx->priv_data;
2400 int row = s->row, col = s->col;
2401 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2402 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2403 int end_x = FFMIN(2 * (s->cols - col), w4);
2404 int end_y = FFMIN(2 * (s->rows - row), h4);
2405 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2406 int uvstep1d = 1 << b->uvtx, p;
2407 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2408 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2409 LOCAL_ALIGNED_16(uint8_t, l, [32]);
2411 for (n = 0, y = 0; y < end_y; y += step1d) {
2412 uint8_t *ptr = dst, *ptr_r = dst_r;
2413 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2414 ptr_r += 4 * step1d, n += step) {
2415 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2417 uint8_t *a = &a_buf[16];
2418 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2419 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2421 mode = check_intra_mode(s, mode, &a, ptr_r,
2422 s->frames[CUR_FRAME].tf.f->linesize[0],
2423 ptr, s->y_stride, l,
2424 col, x, w4, row, y, b->tx, 0);
2425 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2427 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2428 s->block + 16 * n, eob);
2430 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2431 dst += 4 * step1d * s->y_stride;
2439 step = 1 << (b->uvtx * 2);
2440 for (p = 0; p < 2; p++) {
2441 dst = s->dst[1 + p];
2442 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2443 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2444 uint8_t *ptr = dst, *ptr_r = dst_r;
2445 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2446 ptr_r += 4 * uvstep1d, n += step) {
2447 int mode = b->uvmode;
2448 uint8_t *a = &a_buf[16];
2449 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2451 mode = check_intra_mode(s, mode, &a, ptr_r,
2452 s->frames[CUR_FRAME].tf.f->linesize[1],
2453 ptr, s->uv_stride, l,
2454 col, x, w4, row, y, b->uvtx, p + 1);
2455 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2457 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2458 s->uvblock[p] + 16 * n, eob);
2460 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2461 dst += 4 * uvstep1d * s->uv_stride;
2466 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2467 uint8_t *dst, ptrdiff_t dst_stride,
2468 const uint8_t *ref, ptrdiff_t ref_stride,
2469 ThreadFrame *ref_frame,
2470 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2471 int bw, int bh, int w, int h)
2473 int mx = mv->x, my = mv->y, th;
2477 ref += y * ref_stride + x;
2480 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2481 // we use +7 because the last 7 pixels of each sbrow can be changed in
2482 // the longest loopfilter of the next sbrow
2483 th = (y + bh + 4 * !!my + 7) >> 6;
2484 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2485 if (x < !!mx * 3 || y < !!my * 3 ||
2486 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2487 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2488 ref - !!my * 3 * ref_stride - !!mx * 3,
2490 bw + !!mx * 7, bh + !!my * 7,
2491 x - !!mx * 3, y - !!my * 3, w, h);
2492 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2495 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2498 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2499 uint8_t *dst_u, uint8_t *dst_v,
2500 ptrdiff_t dst_stride,
2501 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2502 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2503 ThreadFrame *ref_frame,
2504 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2505 int bw, int bh, int w, int h)
2507 int mx = mv->x, my = mv->y, th;
2511 ref_u += y * src_stride_u + x;
2512 ref_v += y * src_stride_v + x;
2515 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2516 // we use +7 because the last 7 pixels of each sbrow can be changed in
2517 // the longest loopfilter of the next sbrow
2518 th = (y + bh + 4 * !!my + 7) >> 5;
2519 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2520 if (x < !!mx * 3 || y < !!my * 3 ||
2521 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2522 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2523 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2525 bw + !!mx * 7, bh + !!my * 7,
2526 x - !!mx * 3, y - !!my * 3, w, h);
2527 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2528 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2530 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2531 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2533 bw + !!mx * 7, bh + !!my * 7,
2534 x - !!mx * 3, y - !!my * 3, w, h);
2535 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2536 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2538 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2539 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2543 static void inter_recon(AVCodecContext *ctx)
2545 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2546 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2547 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2549 VP9Context *s = ctx->priv_data;
2551 int row = s->row, col = s->col;
2552 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2553 AVFrame *ref1 = tref1->f, *ref2;
2554 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2555 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2558 tref2 = &s->refs[s->refidx[b->ref[1]]];
2565 if (b->bs > BS_8x8) {
2566 if (b->bs == BS_8x4) {
2567 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2568 ref1->data[0], ref1->linesize[0], tref1,
2569 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2570 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2571 s->dst[0] + 4 * ls_y, ls_y,
2572 ref1->data[0], ref1->linesize[0], tref1,
2573 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2576 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2577 ref2->data[0], ref2->linesize[0], tref2,
2578 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2579 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2580 s->dst[0] + 4 * ls_y, ls_y,
2581 ref2->data[0], ref2->linesize[0], tref2,
2582 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2584 } else if (b->bs == BS_4x8) {
2585 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2586 ref1->data[0], ref1->linesize[0], tref1,
2587 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2588 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2589 ref1->data[0], ref1->linesize[0], tref1,
2590 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2593 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2594 ref2->data[0], ref2->linesize[0], tref2,
2595 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2596 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2597 ref2->data[0], ref2->linesize[0], tref2,
2598 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2601 av_assert2(b->bs == BS_4x4);
2603 // FIXME if two horizontally adjacent blocks have the same MV,
2604 // do a w8 instead of a w4 call
2605 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2606 ref1->data[0], ref1->linesize[0], tref1,
2607 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2608 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2609 ref1->data[0], ref1->linesize[0], tref1,
2610 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2611 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2612 s->dst[0] + 4 * ls_y, ls_y,
2613 ref1->data[0], ref1->linesize[0], tref1,
2614 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2615 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2616 s->dst[0] + 4 * ls_y + 4, ls_y,
2617 ref1->data[0], ref1->linesize[0], tref1,
2618 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2621 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2622 ref2->data[0], ref2->linesize[0], tref2,
2623 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2624 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2625 ref2->data[0], ref2->linesize[0], tref2,
2626 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2627 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2628 s->dst[0] + 4 * ls_y, ls_y,
2629 ref2->data[0], ref2->linesize[0], tref2,
2630 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2631 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2632 s->dst[0] + 4 * ls_y + 4, ls_y,
2633 ref2->data[0], ref2->linesize[0], tref2,
2634 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2638 int bwl = bwlog_tab[0][b->bs];
2639 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2641 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2642 ref1->data[0], ref1->linesize[0], tref1,
2643 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2646 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2647 ref2->data[0], ref2->linesize[0], tref2,
2648 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2653 int bwl = bwlog_tab[1][b->bs];
2654 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2663 if (b->bs > BS_8x8) {
2664 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2665 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2670 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2671 s->dst[1], s->dst[2], ls_uv,
2672 ref1->data[1], ref1->linesize[1],
2673 ref1->data[2], ref1->linesize[2], tref1,
2674 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2677 if (b->bs > BS_8x8) {
2678 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2679 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2683 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2684 s->dst[1], s->dst[2], ls_uv,
2685 ref2->data[1], ref2->linesize[1],
2686 ref2->data[2], ref2->linesize[2], tref2,
2687 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2692 /* mostly copied intra_reconn() */
2694 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2695 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2696 int end_x = FFMIN(2 * (s->cols - col), w4);
2697 int end_y = FFMIN(2 * (s->rows - row), h4);
2698 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2699 int uvstep1d = 1 << b->uvtx, p;
2700 uint8_t *dst = s->dst[0];
2703 for (n = 0, y = 0; y < end_y; y += step1d) {
2705 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2706 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2709 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2710 s->block + 16 * n, eob);
2712 dst += 4 * s->y_stride * step1d;
2720 step = 1 << (b->uvtx * 2);
2721 for (p = 0; p < 2; p++) {
2722 dst = s->dst[p + 1];
2723 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2725 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2726 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2729 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2730 s->uvblock[p] + 16 * n, eob);
2732 dst += 4 * uvstep1d * s->uv_stride;
2738 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2739 int row_and_7, int col_and_7,
2740 int w, int h, int col_end, int row_end,
2741 enum TxfmMode tx, int skip_inter)
2743 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2744 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2745 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2746 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2748 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2749 // edges. This means that for UV, we work on two subsampled blocks at
2750 // a time, and we only use the topleft block's mode information to set
2751 // things like block strength. Thus, for any block size smaller than
2752 // 16x16, ignore the odd portion of the block.
2753 if (tx == TX_4X4 && is_uv) {
2768 if (tx == TX_4X4 && !skip_inter) {
2769 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2770 int m_col_odd = (t << (w - 1)) - t;
2772 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2774 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2776 for (y = row_and_7; y < h + row_and_7; y++) {
2777 int col_mask_id = 2 - !(y & 7);
2779 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2780 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2781 // for odd lines, if the odd col is not being filtered,
2782 // skip odd row also:
2789 // if a/c are even row/col and b/d are odd, and d is skipped,
2790 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2791 if ((col_end & 1) && (y & 1)) {
2792 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2794 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2798 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2800 for (y = row_and_7; y < h + row_and_7; y++) {
2801 int col_mask_id = 2 - !(y & 3);
2803 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2804 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2805 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2806 lflvl->mask[is_uv][0][y][3] |= m_col;
2807 lflvl->mask[is_uv][1][y][3] |= m_col;
2811 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2814 int mask_id = (tx == TX_8X8);
2815 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2816 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2817 int m_row = m_col & masks[l2];
2819 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2820 // 8wd loopfilter to prevent going off the visible edge.
2821 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2822 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2823 int m_row_8 = m_row - m_row_16;
2825 for (y = row_and_7; y < h + row_and_7; y++) {
2826 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2827 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2830 for (y = row_and_7; y < h + row_and_7; y++)
2831 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2834 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2835 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2836 lflvl->mask[is_uv][1][y][0] |= m_col;
2837 if (y - row_and_7 == h - 1)
2838 lflvl->mask[is_uv][1][y][1] |= m_col;
2840 for (y = row_and_7; y < h + row_and_7; y += step1d)
2841 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2843 } else if (tx != TX_4X4) {
2846 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2847 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2848 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2849 for (y = row_and_7; y < h + row_and_7; y++)
2850 lflvl->mask[is_uv][0][y][mask_id] |= t;
2852 int t8 = t & 0x01, t4 = t - t8;
2854 for (y = row_and_7; y < h + row_and_7; y++) {
2855 lflvl->mask[is_uv][0][y][2] |= t4;
2856 lflvl->mask[is_uv][0][y][1] |= t8;
2858 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2860 int t8 = t & 0x11, t4 = t - t8;
2862 for (y = row_and_7; y < h + row_and_7; y++) {
2863 lflvl->mask[is_uv][0][y][2] |= t4;
2864 lflvl->mask[is_uv][0][y][1] |= t8;
2866 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2871 static void decode_b(AVCodecContext *ctx, int row, int col,
2872 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2873 enum BlockLevel bl, enum BlockPartition bp)
2875 VP9Context *s = ctx->priv_data;
2877 enum BlockSize bs = bl * 3 + bp;
2878 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2880 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2886 s->min_mv.x = -(128 + col * 64);
2887 s->min_mv.y = -(128 + row * 64);
2888 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2889 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2895 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2902 #define SPLAT_ZERO_CTX(v, n) \
2904 case 1: v = 0; break; \
2905 case 2: AV_ZERO16(&v); break; \
2906 case 4: AV_ZERO32(&v); break; \
2907 case 8: AV_ZERO64(&v); break; \
2908 case 16: AV_ZERO128(&v); break; \
2910 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2912 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2913 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2914 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2918 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2919 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2920 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2921 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2924 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2925 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2926 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2927 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2932 s->block += w4 * h4 * 64;
2933 s->uvblock[0] += w4 * h4 * 16;
2934 s->uvblock[1] += w4 * h4 * 16;
2935 s->eob += 4 * w4 * h4;
2936 s->uveob[0] += w4 * h4;
2937 s->uveob[1] += w4 * h4;
2943 // emulated overhangs if the stride of the target buffer can't hold. This
2944 // allows to support emu-edge and so on even if we have large block
2946 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2947 (row + h4) > s->rows;
2948 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2949 (row + h4) > s->rows;
2951 s->dst[0] = s->tmp_y;
2954 s->dst[0] = f->data[0] + yoff;
2955 s->y_stride = f->linesize[0];
2958 s->dst[1] = s->tmp_uv[0];
2959 s->dst[2] = s->tmp_uv[1];
2962 s->dst[1] = f->data[1] + uvoff;
2963 s->dst[2] = f->data[2] + uvoff;
2964 s->uv_stride = f->linesize[1];
2967 intra_recon(ctx, yoff, uvoff);
2972 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2974 for (n = 0; o < w; n++) {
2979 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
2980 s->tmp_y + o, 64, h, 0, 0);
2986 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2988 for (n = 1; o < w; n++) {
2993 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
2994 s->tmp_uv[0] + o, 32, h, 0, 0);
2995 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
2996 s->tmp_uv[1] + o, 32, h, 0, 0);
3002 // pick filter level and find edges to apply filter to
3003 if (s->filter.level &&
3004 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3005 [b->mode[3] != ZEROMV]) > 0) {
3006 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3007 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3009 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3010 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3011 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3012 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3013 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3014 b->uvtx, skip_inter);
3016 if (!s->filter.lim_lut[lvl]) {
3017 int sharp = s->filter.sharpness;
3021 limit >>= (sharp + 3) >> 2;
3022 limit = FFMIN(limit, 9 - sharp);
3024 limit = FFMAX(limit, 1);
3026 s->filter.lim_lut[lvl] = limit;
3027 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3033 s->block += w4 * h4 * 64;
3034 s->uvblock[0] += w4 * h4 * 16;
3035 s->uvblock[1] += w4 * h4 * 16;
3036 s->eob += 4 * w4 * h4;
3037 s->uveob[0] += w4 * h4;
3038 s->uveob[1] += w4 * h4;
3042 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3043 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3045 VP9Context *s = ctx->priv_data;
3046 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3047 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3048 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3049 s->prob.p.partition[bl][c];
3050 enum BlockPartition bp;
3051 ptrdiff_t hbs = 4 >> bl;
3052 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3053 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3056 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3057 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3058 } else if (col + hbs < s->cols) { // FIXME why not <=?
3059 if (row + hbs < s->rows) { // FIXME why not <=?
3060 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3062 case PARTITION_NONE:
3063 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3066 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3067 yoff += hbs * 8 * y_stride;
3068 uvoff += hbs * 4 * uv_stride;
3069 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3072 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3075 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3077 case PARTITION_SPLIT:
3078 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3079 decode_sb(ctx, row, col + hbs, lflvl,
3080 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3081 yoff += hbs * 8 * y_stride;
3082 uvoff += hbs * 4 * uv_stride;
3083 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3084 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3085 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3090 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3091 bp = PARTITION_SPLIT;
3092 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3093 decode_sb(ctx, row, col + hbs, lflvl,
3094 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3097 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3099 } else if (row + hbs < s->rows) { // FIXME why not <=?
3100 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3101 bp = PARTITION_SPLIT;
3102 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3103 yoff += hbs * 8 * y_stride;
3104 uvoff += hbs * 4 * uv_stride;
3105 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3108 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3111 bp = PARTITION_SPLIT;
3112 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3114 s->counts.partition[bl][c][bp]++;
3117 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3118 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3120 VP9Context *s = ctx->priv_data;
3122 ptrdiff_t hbs = 4 >> bl;
3123 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3124 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3127 av_assert2(b->bl == BL_8X8);
3128 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3129 } else if (s->b->bl == bl) {
3130 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3131 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3132 yoff += hbs * 8 * y_stride;
3133 uvoff += hbs * 4 * uv_stride;
3134 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3135 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3138 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3141 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3142 if (col + hbs < s->cols) { // FIXME why not <=?
3143 if (row + hbs < s->rows) {
3144 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3145 uvoff + 4 * hbs, bl + 1);
3146 yoff += hbs * 8 * y_stride;
3147 uvoff += hbs * 4 * uv_stride;
3148 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3149 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3150 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3154 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3156 } else if (row + hbs < s->rows) {
3157 yoff += hbs * 8 * y_stride;
3158 uvoff += hbs * 4 * uv_stride;
3159 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3164 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3165 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3167 VP9Context *s = ctx->priv_data;
3168 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3169 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3170 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3173 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3174 // if you think of them as acting on a 8x8 block max, we can interleave
3175 // each v/h within the single x loop, but that only works if we work on
3176 // 8 pixel blocks, and we won't always do that (we want at least 16px
3177 // to use SSE2 optimizations, perhaps 32 for AVX2)
3179 // filter edges between columns, Y plane (e.g. block1 | block2)
3180 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3181 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3182 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3183 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3184 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3185 unsigned hm = hm1 | hm2 | hm13 | hm23;
3187 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3189 int L = *l, H = L >> 4;
3190 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3193 if (hmask1[0] & x) {
3194 if (hmask2[0] & x) {
3195 av_assert2(l[8] == L);
3196 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3198 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3200 } else if (hm2 & x) {
3203 E |= s->filter.mblim_lut[L] << 8;
3204 I |= s->filter.lim_lut[L] << 8;
3205 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3207 [0](ptr, ls_y, E, I, H);
3209 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3210 [0](ptr, ls_y, E, I, H);
3213 } else if (hm2 & x) {
3214 int L = l[8], H = L >> 4;
3215 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3218 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3219 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3223 int L = *l, H = L >> 4;
3224 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3229 E |= s->filter.mblim_lut[L] << 8;
3230 I |= s->filter.lim_lut[L] << 8;
3231 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3233 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3235 } else if (hm23 & x) {
3236 int L = l[8], H = L >> 4;
3237 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3239 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3245 // filter edges between rows, Y plane (e.g. ------)
3247 dst = f->data[0] + yoff;
3249 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3250 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3251 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3253 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3256 int L = *l, H = L >> 4;
3257 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3260 if (vmask[0] & (x << 1)) {
3261 av_assert2(l[1] == L);
3262 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3264 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3266 } else if (vm & (x << 1)) {
3269 E |= s->filter.mblim_lut[L] << 8;
3270 I |= s->filter.lim_lut[L] << 8;
3271 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3272 [!!(vmask[1] & (x << 1))]
3273 [1](ptr, ls_y, E, I, H);
3275 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3276 [1](ptr, ls_y, E, I, H);
3278 } else if (vm & (x << 1)) {
3279 int L = l[1], H = L >> 4;
3280 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3282 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3283 [1](ptr + 8, ls_y, E, I, H);
3287 int L = *l, H = L >> 4;
3288 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3290 if (vm3 & (x << 1)) {
3293 E |= s->filter.mblim_lut[L] << 8;
3294 I |= s->filter.lim_lut[L] << 8;
3295 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3297 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3299 } else if (vm3 & (x << 1)) {
3300 int L = l[1], H = L >> 4;
3301 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3303 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3308 // same principle but for U/V planes
3309 for (p = 0; p < 2; p++) {
3311 dst = f->data[1 + p] + uvoff;
3312 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3313 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3314 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3315 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3316 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3318 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3321 int L = *l, H = L >> 4;
3322 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3324 if (hmask1[0] & x) {
3325 if (hmask2[0] & x) {
3326 av_assert2(l[16] == L);
3327 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3329 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3331 } else if (hm2 & x) {
3334 E |= s->filter.mblim_lut[L] << 8;
3335 I |= s->filter.lim_lut[L] << 8;
3336 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3338 [0](ptr, ls_uv, E, I, H);
3340 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3341 [0](ptr, ls_uv, E, I, H);
3343 } else if (hm2 & x) {
3344 int L = l[16], H = L >> 4;
3345 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3347 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3348 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3356 dst = f->data[1 + p] + uvoff;
3357 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3358 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3359 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3361 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3364 int L = *l, H = L >> 4;
3365 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3368 if (vmask[0] & (x << 2)) {
3369 av_assert2(l[2] == L);
3370 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3372 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3374 } else if (vm & (x << 2)) {
3377 E |= s->filter.mblim_lut[L] << 8;
3378 I |= s->filter.lim_lut[L] << 8;
3379 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3380 [!!(vmask[1] & (x << 2))]
3381 [1](ptr, ls_uv, E, I, H);
3383 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3384 [1](ptr, ls_uv, E, I, H);
3386 } else if (vm & (x << 2)) {
3387 int L = l[2], H = L >> 4;
3388 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3390 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3391 [1](ptr + 8, ls_uv, E, I, H);
3401 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3403 int sb_start = ( idx * n) >> log2_n;
3404 int sb_end = ((idx + 1) * n) >> log2_n;
3405 *start = FFMIN(sb_start, n) << 3;
3406 *end = FFMIN(sb_end, n) << 3;
3409 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3410 int max_count, int update_factor)
3412 unsigned ct = ct0 + ct1, p2, p1;
3418 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3419 p2 = av_clip(p2, 1, 255);
3420 ct = FFMIN(ct, max_count);
3421 update_factor = FASTDIV(update_factor * ct, max_count);
3423 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3424 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3427 static void adapt_probs(VP9Context *s)
3430 prob_context *p = &s->prob_ctx[s->framectxid].p;
3431 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3434 for (i = 0; i < 4; i++)
3435 for (j = 0; j < 2; j++)
3436 for (k = 0; k < 2; k++)
3437 for (l = 0; l < 6; l++)
3438 for (m = 0; m < 6; m++) {
3439 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3440 unsigned *e = s->counts.eob[i][j][k][l][m];
3441 unsigned *c = s->counts.coef[i][j][k][l][m];
3443 if (l == 0 && m >= 3) // dc only has 3 pt
3446 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3447 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3448 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3451 if (s->keyframe || s->intraonly) {
3452 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3453 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3454 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3455 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3460 for (i = 0; i < 3; i++)
3461 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3464 for (i = 0; i < 4; i++)
3465 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3468 if (s->comppredmode == PRED_SWITCHABLE) {
3469 for (i = 0; i < 5; i++)
3470 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3474 if (s->comppredmode != PRED_SINGLEREF) {
3475 for (i = 0; i < 5; i++)
3476 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3477 s->counts.comp_ref[i][1], 20, 128);
3480 if (s->comppredmode != PRED_COMPREF) {
3481 for (i = 0; i < 5; i++) {
3482 uint8_t *pp = p->single_ref[i];
3483 unsigned (*c)[2] = s->counts.single_ref[i];
3485 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3486 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3490 // block partitioning
3491 for (i = 0; i < 4; i++)
3492 for (j = 0; j < 4; j++) {
3493 uint8_t *pp = p->partition[i][j];
3494 unsigned *c = s->counts.partition[i][j];
3496 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3497 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3498 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3502 if (s->txfmmode == TX_SWITCHABLE) {
3503 for (i = 0; i < 2; i++) {
3504 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3506 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3507 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3508 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3509 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3510 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3511 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3515 // interpolation filter
3516 if (s->filtermode == FILTER_SWITCHABLE) {
3517 for (i = 0; i < 4; i++) {
3518 uint8_t *pp = p->filter[i];
3519 unsigned *c = s->counts.filter[i];
3521 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3522 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3527 for (i = 0; i < 7; i++) {
3528 uint8_t *pp = p->mv_mode[i];
3529 unsigned *c = s->counts.mv_mode[i];
3531 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3532 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3533 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3538 uint8_t *pp = p->mv_joint;
3539 unsigned *c = s->counts.mv_joint;
3541 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3542 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3543 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3547 for (i = 0; i < 2; i++) {
3549 unsigned *c, (*c2)[2], sum;
3551 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3552 s->counts.mv_comp[i].sign[1], 20, 128);
3554 pp = p->mv_comp[i].classes;
3555 c = s->counts.mv_comp[i].classes;
3556 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3557 adapt_prob(&pp[0], c[0], sum, 20, 128);
3559 adapt_prob(&pp[1], c[1], sum, 20, 128);
3561 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3562 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3564 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3565 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3567 adapt_prob(&pp[6], c[6], sum, 20, 128);
3568 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3569 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3570 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3572 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3573 s->counts.mv_comp[i].class0[1], 20, 128);
3574 pp = p->mv_comp[i].bits;
3575 c2 = s->counts.mv_comp[i].bits;
3576 for (j = 0; j < 10; j++)
3577 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3579 for (j = 0; j < 2; j++) {
3580 pp = p->mv_comp[i].class0_fp[j];
3581 c = s->counts.mv_comp[i].class0_fp[j];
3582 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3583 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3584 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3586 pp = p->mv_comp[i].fp;
3587 c = s->counts.mv_comp[i].fp;
3588 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3589 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3590 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3592 if (s->highprecisionmvs) {
3593 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3594 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3595 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3596 s->counts.mv_comp[i].hp[1], 20, 128);
3601 for (i = 0; i < 4; i++) {
3602 uint8_t *pp = p->y_mode[i];
3603 unsigned *c = s->counts.y_mode[i], sum, s2;
3605 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3606 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3607 sum -= c[TM_VP8_PRED];
3608 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3609 sum -= c[VERT_PRED];
3610 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3611 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3613 adapt_prob(&pp[3], s2, sum, 20, 128);
3615 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3616 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3617 sum -= c[DIAG_DOWN_LEFT_PRED];
3618 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3619 sum -= c[VERT_LEFT_PRED];
3620 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3621 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3625 for (i = 0; i < 10; i++) {
3626 uint8_t *pp = p->uv_mode[i];
3627 unsigned *c = s->counts.uv_mode[i], sum, s2;
3629 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3630 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3631 sum -= c[TM_VP8_PRED];
3632 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3633 sum -= c[VERT_PRED];
3634 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3635 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3637 adapt_prob(&pp[3], s2, sum, 20, 128);
3639 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3640 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3641 sum -= c[DIAG_DOWN_LEFT_PRED];
3642 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3643 sum -= c[VERT_LEFT_PRED];
3644 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3645 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3649 static void free_buffers(VP9Context *s)
3651 av_freep(&s->intra_pred_data[0]);
3652 av_freep(&s->b_base);
3653 av_freep(&s->block_base);
3656 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3658 VP9Context *s = ctx->priv_data;
3661 for (i = 0; i < 2; i++) {
3662 if (s->frames[i].tf.f->data[0])
3663 vp9_unref_frame(ctx, &s->frames[i]);
3664 av_frame_free(&s->frames[i].tf.f);
3666 for (i = 0; i < 8; i++) {
3667 if (s->refs[i].f->data[0])
3668 ff_thread_release_buffer(ctx, &s->refs[i]);
3669 av_frame_free(&s->refs[i].f);
3670 if (s->next_refs[i].f->data[0])
3671 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3672 av_frame_free(&s->next_refs[i].f);
3682 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3683 int *got_frame, AVPacket *pkt)
3685 const uint8_t *data = pkt->data;
3686 int size = pkt->size;
3687 VP9Context *s = ctx->priv_data;
3688 int res, tile_row, tile_col, i, ref, row, col;
3689 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3692 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3694 } else if (res == 0) {
3695 if (!s->refs[ref].f->data[0]) {
3696 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3697 return AVERROR_INVALIDDATA;
3699 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3707 if (s->frames[LAST_FRAME].tf.f->data[0])
3708 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3709 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3710 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3712 if (s->frames[CUR_FRAME].tf.f->data[0])
3713 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3714 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3716 f = s->frames[CUR_FRAME].tf.f;
3717 f->key_frame = s->keyframe;
3718 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3719 ls_y = f->linesize[0];
3720 ls_uv =f->linesize[1];
3723 for (i = 0; i < 8; i++) {
3724 if (s->next_refs[i].f->data[0])
3725 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3726 if (s->refreshrefmask & (1 << i)) {
3727 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3729 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3735 // main tile decode loop
3736 memset(s->above_partition_ctx, 0, s->cols);
3737 memset(s->above_skip_ctx, 0, s->cols);
3738 if (s->keyframe || s->intraonly) {
3739 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3741 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3743 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3744 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3745 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3746 memset(s->above_segpred_ctx, 0, s->cols);
3747 s->pass = s->uses_2pass =
3748 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3749 if ((res = update_block_buffers(ctx)) < 0) {
3750 av_log(ctx, AV_LOG_ERROR,
3751 "Failed to allocate block buffers\n");
3754 if (s->refreshctx && s->parallelmode) {
3757 for (i = 0; i < 4; i++) {
3758 for (j = 0; j < 2; j++)
3759 for (k = 0; k < 2; k++)
3760 for (l = 0; l < 6; l++)
3761 for (m = 0; m < 6; m++)
3762 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3763 s->prob.coef[i][j][k][l][m], 3);
3764 if (s->txfmmode == i)
3767 s->prob_ctx[s->framectxid].p = s->prob.p;
3768 ff_thread_finish_setup(ctx);
3774 s->block = s->block_base;
3775 s->uvblock[0] = s->uvblock_base[0];
3776 s->uvblock[1] = s->uvblock_base[1];
3777 s->eob = s->eob_base;
3778 s->uveob[0] = s->uveob_base[0];
3779 s->uveob[1] = s->uveob_base[1];
3781 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3782 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3783 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3785 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3788 if (tile_col == s->tiling.tile_cols - 1 &&
3789 tile_row == s->tiling.tile_rows - 1) {
3792 tile_size = AV_RB32(data);
3796 if (tile_size > size) {
3797 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3798 return AVERROR_INVALIDDATA;
3800 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3801 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3802 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3803 return AVERROR_INVALIDDATA;
3810 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3811 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3812 struct VP9Filter *lflvl_ptr = s->lflvl;
3813 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3815 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3816 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3817 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3820 memset(s->left_partition_ctx, 0, 8);
3821 memset(s->left_skip_ctx, 0, 8);
3822 if (s->keyframe || s->intraonly) {
3823 memset(s->left_mode_ctx, DC_PRED, 16);
3825 memset(s->left_mode_ctx, NEARESTMV, 8);
3827 memset(s->left_y_nnz_ctx, 0, 16);
3828 memset(s->left_uv_nnz_ctx, 0, 16);
3829 memset(s->left_segpred_ctx, 0, 8);
3831 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3834 for (col = s->tiling.tile_col_start;
3835 col < s->tiling.tile_col_end;
3836 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3837 // FIXME integrate with lf code (i.e. zero after each
3838 // use, similar to invtxfm coefficients, or similar)
3840 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3844 decode_sb_mem(ctx, row, col, lflvl_ptr,
3845 yoff2, uvoff2, BL_64X64);
3847 decode_sb(ctx, row, col, lflvl_ptr,
3848 yoff2, uvoff2, BL_64X64);
3852 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3860 // backup pre-loopfilter reconstruction data for intra
3861 // prediction of next row of sb64s
3862 if (row + 8 < s->rows) {
3863 memcpy(s->intra_pred_data[0],
3864 f->data[0] + yoff + 63 * ls_y,
3866 memcpy(s->intra_pred_data[1],
3867 f->data[1] + uvoff + 31 * ls_uv,
3869 memcpy(s->intra_pred_data[2],
3870 f->data[2] + uvoff + 31 * ls_uv,
3874 // loopfilter one row
3875 if (s->filter.level) {
3878 lflvl_ptr = s->lflvl;
3879 for (col = 0; col < s->cols;
3880 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3881 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3885 // FIXME maybe we can make this more finegrained by running the
3886 // loopfilter per-block instead of after each sbrow
3887 // In fact that would also make intra pred left preparation easier?
3888 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3892 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3894 ff_thread_finish_setup(ctx);
3896 } while (s->pass++ == 1);
3897 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3900 for (i = 0; i < 8; i++) {
3901 if (s->refs[i].f->data[0])
3902 ff_thread_release_buffer(ctx, &s->refs[i]);
3903 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3906 if (!s->invisible) {
3907 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3915 static void vp9_decode_flush(AVCodecContext *ctx)
3917 VP9Context *s = ctx->priv_data;
3920 for (i = 0; i < 2; i++)
3921 vp9_unref_frame(ctx, &s->frames[i]);
3922 for (i = 0; i < 8; i++)
3923 ff_thread_release_buffer(ctx, &s->refs[i]);
3926 static int init_frames(AVCodecContext *ctx)
3928 VP9Context *s = ctx->priv_data;
3931 for (i = 0; i < 2; i++) {
3932 s->frames[i].tf.f = av_frame_alloc();
3933 if (!s->frames[i].tf.f) {
3934 vp9_decode_free(ctx);
3935 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3936 return AVERROR(ENOMEM);
3939 for (i = 0; i < 8; i++) {
3940 s->refs[i].f = av_frame_alloc();
3941 s->next_refs[i].f = av_frame_alloc();
3942 if (!s->refs[i].f || !s->next_refs[i].f) {
3943 vp9_decode_free(ctx);
3944 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3945 return AVERROR(ENOMEM);
3952 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3954 VP9Context *s = ctx->priv_data;
3956 ctx->internal->allocate_progress = 1;
3957 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3958 ff_vp9dsp_init(&s->dsp);
3959 ff_videodsp_init(&s->vdsp, 8);
3960 s->filter.sharpness = -1;
3962 return init_frames(ctx);
3965 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
3967 return init_frames(avctx);
3970 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
3973 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
3975 // detect size changes in other threads
3976 if (s->intra_pred_data[0] &&
3977 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
3981 for (i = 0; i < 2; i++) {
3982 if (s->frames[i].tf.f->data[0])
3983 vp9_unref_frame(dst, &s->frames[i]);
3984 if (ssrc->frames[i].tf.f->data[0]) {
3985 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
3989 for (i = 0; i < 8; i++) {
3990 if (s->refs[i].f->data[0])
3991 ff_thread_release_buffer(dst, &s->refs[i]);
3992 if (ssrc->next_refs[i].f->data[0]) {
3993 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
3998 s->invisible = ssrc->invisible;
3999 s->keyframe = ssrc->keyframe;
4000 s->uses_2pass = ssrc->uses_2pass;
4001 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4002 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4003 if (ssrc->segmentation.enabled) {
4004 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4005 sizeof(s->segmentation.feat));
4011 AVCodec ff_vp9_decoder = {
4013 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4014 .type = AVMEDIA_TYPE_VIDEO,
4015 .id = AV_CODEC_ID_VP9,
4016 .priv_data_size = sizeof(VP9Context),
4017 .init = vp9_decode_init,
4018 .close = vp9_decode_free,
4019 .decode = vp9_decode_frame,
4020 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4021 .flush = vp9_decode_flush,
4022 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4023 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),