2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
151 uint8_t absolute_vals;
157 uint8_t skip_enabled;
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 unsigned sb_cols, sb_rows, rows, cols;
173 uint8_t coef[4][2][2][6][6][3];
177 uint8_t coef[4][2][2][6][6][11];
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
194 unsigned mv_joint[4];
197 unsigned classes[11];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
202 unsigned class0_hp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
212 // contextual (left/above) cache
213 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
214 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
215 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
216 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
217 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
218 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
225 uint8_t *above_partition_ctx;
226 uint8_t *above_mode_ctx;
227 // FIXME maybe merge some of the below in a flags field?
228 uint8_t *above_y_nnz_ctx;
229 uint8_t *above_uv_nnz_ctx[2];
230 uint8_t *above_skip_ctx; // 1bit
231 uint8_t *above_txfm_ctx; // 2bit
232 uint8_t *above_segpred_ctx; // 1bit
233 uint8_t *above_intra_ctx; // 1bit
234 uint8_t *above_comp_ctx; // 1bit
235 uint8_t *above_ref_ctx; // 2bit
236 uint8_t *above_filter_ctx;
237 VP56mv (*above_mv_ctx)[2];
240 uint8_t *intra_pred_data[3];
241 struct VP9Filter *lflvl;
242 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
244 // block reconstruction intermediates
245 int block_alloc_using_2pass;
246 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
247 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
248 struct { int x, y; } min_mv, max_mv;
249 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
250 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
253 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
255 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
256 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
258 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
259 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
263 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
265 VP9Context *s = ctx->priv_data;
268 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
270 sz = 64 * s->sb_cols * s->sb_rows;
271 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
272 ff_thread_release_buffer(ctx, &f->tf);
273 return AVERROR(ENOMEM);
276 f->segmentation_map = f->extradata->data;
277 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
279 // retain segmentation map if it doesn't update
280 if (s->segmentation.enabled && !s->segmentation.update_map &&
281 !s->intraonly && !s->keyframe) {
282 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
311 static int update_size(AVCodecContext *ctx, int w, int h)
313 VP9Context *s = ctx->priv_data;
316 av_assert0(w > 0 && h > 0);
318 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
332 return AVERROR(ENOMEM);
333 assign(s->intra_pred_data[0], uint8_t *, 64);
334 assign(s->intra_pred_data[1], uint8_t *, 32);
335 assign(s->intra_pred_data[2], uint8_t *, 32);
336 assign(s->above_y_nnz_ctx, uint8_t *, 16);
337 assign(s->above_mode_ctx, uint8_t *, 16);
338 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
339 assign(s->above_partition_ctx, uint8_t *, 8);
340 assign(s->above_skip_ctx, uint8_t *, 8);
341 assign(s->above_txfm_ctx, uint8_t *, 8);
342 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
344 assign(s->above_segpred_ctx, uint8_t *, 8);
345 assign(s->above_intra_ctx, uint8_t *, 8);
346 assign(s->above_comp_ctx, uint8_t *, 8);
347 assign(s->above_ref_ctx, uint8_t *, 8);
348 assign(s->above_filter_ctx, uint8_t *, 8);
349 assign(s->lflvl, struct VP9Filter *, 1);
352 // these will be re-allocated a little later
353 av_freep(&s->b_base);
354 av_freep(&s->block_base);
359 static int update_block_buffers(AVCodecContext *ctx)
361 VP9Context *s = ctx->priv_data;
363 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
367 av_free(s->block_base);
369 int sbs = s->sb_cols * s->sb_rows;
371 s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
372 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
373 if (!s->b_base || !s->block_base)
374 return AVERROR(ENOMEM);
375 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
376 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
377 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
378 s->uveob_base[0] = s->eob_base + 256 * sbs;
379 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
381 s->b_base = av_malloc(sizeof(VP9Block));
382 s->block_base = av_mallocz((64 * 64 + 128) * 3);
383 if (!s->b_base || !s->block_base)
384 return AVERROR(ENOMEM);
385 s->uvblock_base[0] = s->block_base + 64 * 64;
386 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
387 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
388 s->uveob_base[0] = s->eob_base + 256;
389 s->uveob_base[1] = s->uveob_base[0] + 64;
391 s->block_alloc_using_2pass = s->uses_2pass;
396 // for some reason the sign bit is at the end, not the start, of a bit sequence
397 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
399 int v = get_bits(gb, n);
400 return get_bits1(gb) ? -v : v;
403 static av_always_inline int inv_recenter_nonneg(int v, int m)
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
408 // differential forward probability updates
409 static int update_prob(VP56RangeCoder *c, int p)
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
434 /* This code is trying to do a differential probability update. For a
435 * current probability A in the range [1, 255], the difference to a new
436 * probability of any value can be expressed differentially as 1-A,255-A
437 * where some part of this (absolute range) exists both in positive as
438 * well as the negative part, whereas another part only exists in one
439 * half. We're trying to code this shared part differentially, i.e.
440 * times two where the value of the lowest bit specifies the sign, and
441 * the single part is then coded on top of this. This absolute difference
442 * then again has a value of [0,254], but a bigger value in this range
443 * indicates that we're further away from the original value A, so we
444 * can code this as a VLC code, since higher values are increasingly
445 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
446 * updates vs. the 'fine, exact' updates further down the range, which
447 * adds one extra dimension to this differential update model. */
449 if (!vp8_rac_get(c)) {
450 d = vp8_rac_get_uint(c, 4) + 0;
451 } else if (!vp8_rac_get(c)) {
452 d = vp8_rac_get_uint(c, 4) + 16;
453 } else if (!vp8_rac_get(c)) {
454 d = vp8_rac_get_uint(c, 5) + 32;
456 d = vp8_rac_get_uint(c, 7);
458 d = (d << 1) - 65 + vp8_rac_get(c);
462 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
463 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
466 static int decode_frame_header(AVCodecContext *ctx,
467 const uint8_t *data, int size, int *ref)
469 VP9Context *s = ctx->priv_data;
470 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
472 const uint8_t *data2;
475 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
476 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
479 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
480 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
481 return AVERROR_INVALIDDATA;
483 s->profile = get_bits1(&s->gb);
484 if (get_bits1(&s->gb)) { // reserved bit
485 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
486 return AVERROR_INVALIDDATA;
488 if (get_bits1(&s->gb)) {
489 *ref = get_bits(&s->gb, 3);
492 s->last_uses_2pass = s->uses_2pass;
493 s->last_keyframe = s->keyframe;
494 s->keyframe = !get_bits1(&s->gb);
495 last_invisible = s->invisible;
496 s->invisible = !get_bits1(&s->gb);
497 s->errorres = get_bits1(&s->gb);
498 s->use_last_frame_mvs = !s->errorres && !last_invisible;
500 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
501 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
502 return AVERROR_INVALIDDATA;
504 s->colorspace = get_bits(&s->gb, 3);
505 if (s->colorspace == 7) { // RGB = profile 1
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
507 return AVERROR_INVALIDDATA;
509 s->fullrange = get_bits1(&s->gb);
510 // for profile 1, here follows the subsampling bits
511 s->refreshrefmask = 0xff;
512 w = get_bits(&s->gb, 16) + 1;
513 h = get_bits(&s->gb, 16) + 1;
514 if (get_bits1(&s->gb)) // display size
515 skip_bits(&s->gb, 32);
517 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
518 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
520 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
521 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
522 return AVERROR_INVALIDDATA;
524 s->refreshrefmask = get_bits(&s->gb, 8);
525 w = get_bits(&s->gb, 16) + 1;
526 h = get_bits(&s->gb, 16) + 1;
527 if (get_bits1(&s->gb)) // display size
528 skip_bits(&s->gb, 32);
530 s->refreshrefmask = get_bits(&s->gb, 8);
531 s->refidx[0] = get_bits(&s->gb, 3);
532 s->signbias[0] = get_bits1(&s->gb);
533 s->refidx[1] = get_bits(&s->gb, 3);
534 s->signbias[1] = get_bits1(&s->gb);
535 s->refidx[2] = get_bits(&s->gb, 3);
536 s->signbias[2] = get_bits1(&s->gb);
537 if (!s->refs[s->refidx[0]].f->data[0] ||
538 !s->refs[s->refidx[1]].f->data[0] ||
539 !s->refs[s->refidx[2]].f->data[0]) {
540 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
541 return AVERROR_INVALIDDATA;
543 if (get_bits1(&s->gb)) {
544 w = s->refs[s->refidx[0]].f->width;
545 h = s->refs[s->refidx[0]].f->height;
546 } else if (get_bits1(&s->gb)) {
547 w = s->refs[s->refidx[1]].f->width;
548 h = s->refs[s->refidx[1]].f->height;
549 } else if (get_bits1(&s->gb)) {
550 w = s->refs[s->refidx[2]].f->width;
551 h = s->refs[s->refidx[2]].f->height;
553 w = get_bits(&s->gb, 16) + 1;
554 h = get_bits(&s->gb, 16) + 1;
556 // Note that in this code, "CUR_FRAME" is actually before we
557 // have formally allocated a frame, and thus actually represents
559 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
560 s->frames[CUR_FRAME].tf.f->height == h;
561 if (get_bits1(&s->gb)) // display size
562 skip_bits(&s->gb, 32);
563 s->highprecisionmvs = get_bits1(&s->gb);
564 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
566 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
567 s->signbias[0] != s->signbias[2];
568 if (s->allowcompinter) {
569 if (s->signbias[0] == s->signbias[1]) {
571 s->varcompref[0] = 0;
572 s->varcompref[1] = 1;
573 } else if (s->signbias[0] == s->signbias[2]) {
575 s->varcompref[0] = 0;
576 s->varcompref[1] = 2;
579 s->varcompref[0] = 1;
580 s->varcompref[1] = 2;
585 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
586 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
587 s->framectxid = c = get_bits(&s->gb, 2);
589 /* loopfilter header data */
590 s->filter.level = get_bits(&s->gb, 6);
591 sharp = get_bits(&s->gb, 3);
592 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
593 // the old cache values since they are still valid
594 if (s->filter.sharpness != sharp)
595 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
596 s->filter.sharpness = sharp;
597 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
598 if (get_bits1(&s->gb)) {
599 for (i = 0; i < 4; i++)
600 if (get_bits1(&s->gb))
601 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
602 for (i = 0; i < 2; i++)
603 if (get_bits1(&s->gb))
604 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
607 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
610 /* quantization header data */
611 s->yac_qi = get_bits(&s->gb, 8);
612 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
613 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
614 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
615 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
616 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
618 /* segmentation header info */
619 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
620 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
621 for (i = 0; i < 7; i++)
622 s->prob.seg[i] = get_bits1(&s->gb) ?
623 get_bits(&s->gb, 8) : 255;
624 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
625 for (i = 0; i < 3; i++)
626 s->prob.segpred[i] = get_bits1(&s->gb) ?
627 get_bits(&s->gb, 8) : 255;
630 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
631 (w != s->frames[CUR_FRAME].tf.f->width ||
632 h != s->frames[CUR_FRAME].tf.f->height)) {
633 av_log(ctx, AV_LOG_ERROR,
634 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
635 s->segmentation.temporal, s->segmentation.update_map);
636 return AVERROR_INVALIDDATA;
639 if (get_bits1(&s->gb)) {
640 s->segmentation.absolute_vals = get_bits1(&s->gb);
641 for (i = 0; i < 8; i++) {
642 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
643 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
644 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
645 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
646 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
647 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
648 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
652 s->segmentation.feat[0].q_enabled = 0;
653 s->segmentation.feat[0].lf_enabled = 0;
654 s->segmentation.feat[0].skip_enabled = 0;
655 s->segmentation.feat[0].ref_enabled = 0;
658 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
659 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
660 int qyac, qydc, quvac, quvdc, lflvl, sh;
662 if (s->segmentation.feat[i].q_enabled) {
663 if (s->segmentation.absolute_vals)
664 qyac = s->segmentation.feat[i].q_val;
666 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
670 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
671 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
672 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
673 qyac = av_clip_uintp2(qyac, 8);
675 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
676 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
677 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
678 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
680 sh = s->filter.level >= 32;
681 if (s->segmentation.feat[i].lf_enabled) {
682 if (s->segmentation.absolute_vals)
683 lflvl = s->segmentation.feat[i].lf_val;
685 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
687 lflvl = s->filter.level;
689 s->segmentation.feat[i].lflvl[0][0] =
690 s->segmentation.feat[i].lflvl[0][1] =
691 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
692 for (j = 1; j < 4; j++) {
693 s->segmentation.feat[i].lflvl[j][0] =
694 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
695 s->lf_delta.mode[0]) << sh), 6);
696 s->segmentation.feat[i].lflvl[j][1] =
697 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
698 s->lf_delta.mode[1]) << sh), 6);
703 if ((res = update_size(ctx, w, h)) < 0) {
704 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
707 for (s->tiling.log2_tile_cols = 0;
708 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
709 s->tiling.log2_tile_cols++) ;
710 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
711 max = FFMAX(0, max - 1);
712 while (max > s->tiling.log2_tile_cols) {
713 if (get_bits1(&s->gb))
714 s->tiling.log2_tile_cols++;
718 s->tiling.log2_tile_rows = decode012(&s->gb);
719 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
720 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
721 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
722 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
723 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
725 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
726 return AVERROR(ENOMEM);
730 if (s->keyframe || s->errorres || s->intraonly) {
731 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
732 s->prob_ctx[3].p = vp9_default_probs;
733 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
734 sizeof(vp9_default_coef_probs));
735 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
736 sizeof(vp9_default_coef_probs));
737 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
738 sizeof(vp9_default_coef_probs));
739 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
740 sizeof(vp9_default_coef_probs));
743 // next 16 bits is size of the rest of the header (arith-coded)
744 size2 = get_bits(&s->gb, 16);
745 data2 = align_get_bits(&s->gb);
746 if (size2 > size - (data2 - data)) {
747 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
748 return AVERROR_INVALIDDATA;
750 ff_vp56_init_range_decoder(&s->c, data2, size2);
751 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
752 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
753 return AVERROR_INVALIDDATA;
756 if (s->keyframe || s->intraonly) {
757 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
759 memset(&s->counts, 0, sizeof(s->counts));
761 // FIXME is it faster to not copy here, but do it down in the fw updates
762 // as explicit copies if the fw update is missing (and skip the copy upon
764 s->prob.p = s->prob_ctx[c].p;
768 s->txfmmode = TX_4X4;
770 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
771 if (s->txfmmode == 3)
772 s->txfmmode += vp8_rac_get(&s->c);
774 if (s->txfmmode == TX_SWITCHABLE) {
775 for (i = 0; i < 2; i++)
776 if (vp56_rac_get_prob_branchy(&s->c, 252))
777 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
778 for (i = 0; i < 2; i++)
779 for (j = 0; j < 2; j++)
780 if (vp56_rac_get_prob_branchy(&s->c, 252))
781 s->prob.p.tx16p[i][j] =
782 update_prob(&s->c, s->prob.p.tx16p[i][j]);
783 for (i = 0; i < 2; i++)
784 for (j = 0; j < 3; j++)
785 if (vp56_rac_get_prob_branchy(&s->c, 252))
786 s->prob.p.tx32p[i][j] =
787 update_prob(&s->c, s->prob.p.tx32p[i][j]);
792 for (i = 0; i < 4; i++) {
793 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
794 if (vp8_rac_get(&s->c)) {
795 for (j = 0; j < 2; j++)
796 for (k = 0; k < 2; k++)
797 for (l = 0; l < 6; l++)
798 for (m = 0; m < 6; m++) {
799 uint8_t *p = s->prob.coef[i][j][k][l][m];
800 uint8_t *r = ref[j][k][l][m];
801 if (m >= 3 && l == 0) // dc only has 3 pt
803 for (n = 0; n < 3; n++) {
804 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
805 p[n] = update_prob(&s->c, r[n]);
813 for (j = 0; j < 2; j++)
814 for (k = 0; k < 2; k++)
815 for (l = 0; l < 6; l++)
816 for (m = 0; m < 6; m++) {
817 uint8_t *p = s->prob.coef[i][j][k][l][m];
818 uint8_t *r = ref[j][k][l][m];
819 if (m > 3 && l == 0) // dc only has 3 pt
825 if (s->txfmmode == i)
830 for (i = 0; i < 3; i++)
831 if (vp56_rac_get_prob_branchy(&s->c, 252))
832 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
833 if (!s->keyframe && !s->intraonly) {
834 for (i = 0; i < 7; i++)
835 for (j = 0; j < 3; j++)
836 if (vp56_rac_get_prob_branchy(&s->c, 252))
837 s->prob.p.mv_mode[i][j] =
838 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
840 if (s->filtermode == FILTER_SWITCHABLE)
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 2; j++)
843 if (vp56_rac_get_prob_branchy(&s->c, 252))
844 s->prob.p.filter[i][j] =
845 update_prob(&s->c, s->prob.p.filter[i][j]);
847 for (i = 0; i < 4; i++)
848 if (vp56_rac_get_prob_branchy(&s->c, 252))
849 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
851 if (s->allowcompinter) {
852 s->comppredmode = vp8_rac_get(&s->c);
854 s->comppredmode += vp8_rac_get(&s->c);
855 if (s->comppredmode == PRED_SWITCHABLE)
856 for (i = 0; i < 5; i++)
857 if (vp56_rac_get_prob_branchy(&s->c, 252))
859 update_prob(&s->c, s->prob.p.comp[i]);
861 s->comppredmode = PRED_SINGLEREF;
864 if (s->comppredmode != PRED_COMPREF) {
865 for (i = 0; i < 5; i++) {
866 if (vp56_rac_get_prob_branchy(&s->c, 252))
867 s->prob.p.single_ref[i][0] =
868 update_prob(&s->c, s->prob.p.single_ref[i][0]);
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.single_ref[i][1] =
871 update_prob(&s->c, s->prob.p.single_ref[i][1]);
875 if (s->comppredmode != PRED_SINGLEREF) {
876 for (i = 0; i < 5; i++)
877 if (vp56_rac_get_prob_branchy(&s->c, 252))
878 s->prob.p.comp_ref[i] =
879 update_prob(&s->c, s->prob.p.comp_ref[i]);
882 for (i = 0; i < 4; i++)
883 for (j = 0; j < 9; j++)
884 if (vp56_rac_get_prob_branchy(&s->c, 252))
885 s->prob.p.y_mode[i][j] =
886 update_prob(&s->c, s->prob.p.y_mode[i][j]);
888 for (i = 0; i < 4; i++)
889 for (j = 0; j < 4; j++)
890 for (k = 0; k < 3; k++)
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.partition[3 - i][j][k] =
893 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
895 // mv fields don't use the update_prob subexp model for some reason
896 for (i = 0; i < 3; i++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
900 for (i = 0; i < 2; i++) {
901 if (vp56_rac_get_prob_branchy(&s->c, 252))
902 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
904 for (j = 0; j < 10; j++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.mv_comp[i].classes[j] =
907 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
909 if (vp56_rac_get_prob_branchy(&s->c, 252))
910 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
912 for (j = 0; j < 10; j++)
913 if (vp56_rac_get_prob_branchy(&s->c, 252))
914 s->prob.p.mv_comp[i].bits[j] =
915 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
918 for (i = 0; i < 2; i++) {
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 3; k++)
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 s->prob.p.mv_comp[i].class0_fp[j][k] =
923 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
925 for (j = 0; j < 3; j++)
926 if (vp56_rac_get_prob_branchy(&s->c, 252))
927 s->prob.p.mv_comp[i].fp[j] =
928 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
931 if (s->highprecisionmvs) {
932 for (i = 0; i < 2; i++) {
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.mv_comp[i].class0_hp =
935 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.mv_comp[i].hp =
939 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
944 return (data2 - data) + size2;
947 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
950 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
951 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
954 static void find_ref_mvs(VP9Context *s,
955 VP56mv *pmv, int ref, int z, int idx, int sb)
957 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
958 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
959 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
960 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
961 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
962 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
963 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
964 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
965 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
966 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
967 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
969 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
970 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
971 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
972 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
973 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
974 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
975 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
976 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
977 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
978 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
986 int row = s->row, col = s->col, row7 = s->row7;
987 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
988 #define INVALID_MV 0x80008000U
989 uint32_t mem = INVALID_MV;
992 #define RETURN_DIRECT_MV(mv) \
994 uint32_t m = AV_RN32A(&mv); \
998 } else if (mem == INVALID_MV) { \
1000 } else if (m != mem) { \
1007 if (sb == 2 || sb == 1) {
1008 RETURN_DIRECT_MV(b->mv[0][z]);
1009 } else if (sb == 3) {
1010 RETURN_DIRECT_MV(b->mv[2][z]);
1011 RETURN_DIRECT_MV(b->mv[1][z]);
1012 RETURN_DIRECT_MV(b->mv[0][z]);
1015 #define RETURN_MV(mv) \
1020 clamp_mv(&tmp, &mv, s); \
1021 m = AV_RN32A(&tmp); \
1025 } else if (mem == INVALID_MV) { \
1027 } else if (m != mem) { \
1032 uint32_t m = AV_RN32A(&mv); \
1034 clamp_mv(pmv, &mv, s); \
1036 } else if (mem == INVALID_MV) { \
1038 } else if (m != mem) { \
1039 clamp_mv(pmv, &mv, s); \
1046 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1047 if (mv->ref[0] == ref) {
1048 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1049 } else if (mv->ref[1] == ref) {
1050 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1053 if (col > s->tiling.tile_col_start) {
1054 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1055 if (mv->ref[0] == ref) {
1056 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1057 } else if (mv->ref[1] == ref) {
1058 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1066 // previously coded MVs in this neighbourhood, using same reference frame
1067 for (; i < 8; i++) {
1068 int c = p[i][0] + col, r = p[i][1] + row;
1070 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1071 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1073 if (mv->ref[0] == ref) {
1074 RETURN_MV(mv->mv[0]);
1075 } else if (mv->ref[1] == ref) {
1076 RETURN_MV(mv->mv[1]);
1081 // MV at this position in previous frame, using same reference frame
1082 if (s->use_last_frame_mvs) {
1083 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1085 if (!s->last_uses_2pass)
1086 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1087 if (mv->ref[0] == ref) {
1088 RETURN_MV(mv->mv[0]);
1089 } else if (mv->ref[1] == ref) {
1090 RETURN_MV(mv->mv[1]);
1094 #define RETURN_SCALE_MV(mv, scale) \
1097 VP56mv mv_temp = { -mv.x, -mv.y }; \
1098 RETURN_MV(mv_temp); \
1104 // previously coded MVs in this neighbourhood, using different reference frame
1105 for (i = 0; i < 8; i++) {
1106 int c = p[i][0] + col, r = p[i][1] + row;
1108 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1109 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1111 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1112 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1114 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1115 // BUG - libvpx has this condition regardless of whether
1116 // we used the first ref MV and pre-scaling
1117 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1118 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1123 // MV at this position in previous frame, using different reference frame
1124 if (s->use_last_frame_mvs) {
1125 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1127 // no need to await_progress, because we already did that above
1128 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1129 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1131 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1132 // BUG - libvpx has this condition regardless of whether
1133 // we used the first ref MV and pre-scaling
1134 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1135 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1142 #undef RETURN_SCALE_MV
1145 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1147 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1148 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1149 s->prob.p.mv_comp[idx].classes);
1151 s->counts.mv_comp[idx].sign[sign]++;
1152 s->counts.mv_comp[idx].classes[c]++;
1156 for (n = 0, m = 0; m < c; m++) {
1157 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1159 s->counts.mv_comp[idx].bits[m][bit]++;
1162 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1164 s->counts.mv_comp[idx].fp[bit]++;
1166 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1167 s->counts.mv_comp[idx].hp[bit]++;
1171 // bug in libvpx - we count for bw entropy purposes even if the
1173 s->counts.mv_comp[idx].hp[1]++;
1177 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1178 s->counts.mv_comp[idx].class0[n]++;
1179 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1180 s->prob.p.mv_comp[idx].class0_fp[n]);
1181 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1182 n = (n << 3) | (bit << 1);
1184 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1185 s->counts.mv_comp[idx].class0_hp[bit]++;
1189 // bug in libvpx - we count for bw entropy purposes even if the
1191 s->counts.mv_comp[idx].class0_hp[1]++;
1195 return sign ? -(n + 1) : (n + 1);
1198 static void fill_mv(VP9Context *s,
1199 VP56mv *mv, int mode, int sb)
1203 if (mode == ZEROMV) {
1208 // FIXME cache this value and reuse for other subblocks
1209 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1210 mode == NEWMV ? -1 : sb);
1211 // FIXME maybe move this code into find_ref_mvs()
1212 if ((mode == NEWMV || sb == -1) &&
1213 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1227 if (mode == NEWMV) {
1228 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1229 s->prob.p.mv_joint);
1231 s->counts.mv_joint[j]++;
1232 if (j >= MV_JOINT_V)
1233 mv[0].y += read_mv_component(s, 0, hp);
1235 mv[0].x += read_mv_component(s, 1, hp);
1239 // FIXME cache this value and reuse for other subblocks
1240 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1241 mode == NEWMV ? -1 : sb);
1242 if ((mode == NEWMV || sb == -1) &&
1243 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1257 if (mode == NEWMV) {
1258 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1259 s->prob.p.mv_joint);
1261 s->counts.mv_joint[j]++;
1262 if (j >= MV_JOINT_V)
1263 mv[1].y += read_mv_component(s, 0, hp);
1265 mv[1].x += read_mv_component(s, 1, hp);
1271 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1272 ptrdiff_t stride, int v)
1282 int v16 = v * 0x0101;
1290 uint32_t v32 = v * 0x01010101;
1299 uint64_t v64 = v * 0x0101010101010101ULL;
1305 uint32_t v32 = v * 0x01010101;
1308 AV_WN32A(ptr + 4, v32);
1317 static void decode_mode(AVCodecContext *ctx)
1319 static const uint8_t left_ctx[N_BS_SIZES] = {
1320 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1322 static const uint8_t above_ctx[N_BS_SIZES] = {
1323 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1325 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1326 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1327 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1329 VP9Context *s = ctx->priv_data;
1331 int row = s->row, col = s->col, row7 = s->row7;
1332 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1333 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1334 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1335 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1336 int vref, filter_id;
1338 if (!s->segmentation.enabled) {
1340 } else if (s->keyframe || s->intraonly) {
1341 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1342 } else if (!s->segmentation.update_map ||
1343 (s->segmentation.temporal &&
1344 vp56_rac_get_prob_branchy(&s->c,
1345 s->prob.segpred[s->above_segpred_ctx[col] +
1346 s->left_segpred_ctx[row7]]))) {
1348 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1350 if (!s->last_uses_2pass)
1351 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1352 for (y = 0; y < h4; y++)
1353 for (x = 0; x < w4; x++)
1354 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1355 av_assert1(pred < 8);
1358 memset(&s->above_segpred_ctx[col], 1, w4);
1359 memset(&s->left_segpred_ctx[row7], 1, h4);
1361 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1364 memset(&s->above_segpred_ctx[col], 0, w4);
1365 memset(&s->left_segpred_ctx[row7], 0, h4);
1367 if (s->segmentation.enabled &&
1368 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1369 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1370 w4, h4, 8 * s->sb_cols, b->seg_id);
1373 b->skip = s->segmentation.enabled &&
1374 s->segmentation.feat[b->seg_id].skip_enabled;
1376 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1377 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1378 s->counts.skip[c][b->skip]++;
1381 if (s->keyframe || s->intraonly) {
1383 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1384 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1388 if (have_a && have_l) {
1389 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1392 c = have_a ? 2 * s->above_intra_ctx[col] :
1393 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1395 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1396 s->counts.intra[c][bit]++;
1400 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1404 c = (s->above_skip_ctx[col] ? max_tx :
1405 s->above_txfm_ctx[col]) +
1406 (s->left_skip_ctx[row7] ? max_tx :
1407 s->left_txfm_ctx[row7]) > max_tx;
1409 c = s->above_skip_ctx[col] ? 1 :
1410 (s->above_txfm_ctx[col] * 2 > max_tx);
1412 } else if (have_l) {
1413 c = s->left_skip_ctx[row7] ? 1 :
1414 (s->left_txfm_ctx[row7] * 2 > max_tx);
1420 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1422 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1424 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1426 s->counts.tx32p[c][b->tx]++;
1429 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1431 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1432 s->counts.tx16p[c][b->tx]++;
1435 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1436 s->counts.tx8p[c][b->tx]++;
1443 b->tx = FFMIN(max_tx, s->txfmmode);
1446 if (s->keyframe || s->intraonly) {
1447 uint8_t *a = &s->above_mode_ctx[col * 2];
1448 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1451 if (b->bs > BS_8x8) {
1452 // FIXME the memory storage intermediates here aren't really
1453 // necessary, they're just there to make the code slightly
1455 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1456 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1457 if (b->bs != BS_8x4) {
1458 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1459 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1460 l[0] = a[1] = b->mode[1];
1462 l[0] = a[1] = b->mode[1] = b->mode[0];
1464 if (b->bs != BS_4x8) {
1465 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1466 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1467 if (b->bs != BS_8x4) {
1468 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1469 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1470 l[1] = a[1] = b->mode[3];
1472 l[1] = a[1] = b->mode[3] = b->mode[2];
1475 b->mode[2] = b->mode[0];
1476 l[1] = a[1] = b->mode[3] = b->mode[1];
1479 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1480 vp9_default_kf_ymode_probs[*a][*l]);
1481 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1482 // FIXME this can probably be optimized
1483 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1484 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1486 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1487 vp9_default_kf_uvmode_probs[b->mode[3]]);
1488 } else if (b->intra) {
1490 if (b->bs > BS_8x8) {
1491 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1492 s->prob.p.y_mode[0]);
1493 s->counts.y_mode[0][b->mode[0]]++;
1494 if (b->bs != BS_8x4) {
1495 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1496 s->prob.p.y_mode[0]);
1497 s->counts.y_mode[0][b->mode[1]]++;
1499 b->mode[1] = b->mode[0];
1501 if (b->bs != BS_4x8) {
1502 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1503 s->prob.p.y_mode[0]);
1504 s->counts.y_mode[0][b->mode[2]]++;
1505 if (b->bs != BS_8x4) {
1506 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1507 s->prob.p.y_mode[0]);
1508 s->counts.y_mode[0][b->mode[3]]++;
1510 b->mode[3] = b->mode[2];
1513 b->mode[2] = b->mode[0];
1514 b->mode[3] = b->mode[1];
1517 static const uint8_t size_group[10] = {
1518 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1520 int sz = size_group[b->bs];
1522 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1523 s->prob.p.y_mode[sz]);
1524 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1525 s->counts.y_mode[sz][b->mode[3]]++;
1527 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1528 s->prob.p.uv_mode[b->mode[3]]);
1529 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1531 static const uint8_t inter_mode_ctx_lut[14][14] = {
1532 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1533 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1534 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1535 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1543 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1544 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1545 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1548 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1549 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1551 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1553 // read comp_pred flag
1554 if (s->comppredmode != PRED_SWITCHABLE) {
1555 b->comp = s->comppredmode == PRED_COMPREF;
1559 // FIXME add intra as ref=0xff (or -1) to make these easier?
1562 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1564 } else if (s->above_comp_ctx[col]) {
1565 c = 2 + (s->left_intra_ctx[row7] ||
1566 s->left_ref_ctx[row7] == s->fixcompref);
1567 } else if (s->left_comp_ctx[row7]) {
1568 c = 2 + (s->above_intra_ctx[col] ||
1569 s->above_ref_ctx[col] == s->fixcompref);
1571 c = (!s->above_intra_ctx[col] &&
1572 s->above_ref_ctx[col] == s->fixcompref) ^
1573 (!s->left_intra_ctx[row7] &&
1574 s->left_ref_ctx[row & 7] == s->fixcompref);
1577 c = s->above_comp_ctx[col] ? 3 :
1578 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1580 } else if (have_l) {
1581 c = s->left_comp_ctx[row7] ? 3 :
1582 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1586 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1587 s->counts.comp[c][b->comp]++;
1590 // read actual references
1591 // FIXME probably cache a few variables here to prevent repetitive
1592 // memory accesses below
1593 if (b->comp) /* two references */ {
1594 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1596 b->ref[fix_idx] = s->fixcompref;
1597 // FIXME can this codeblob be replaced by some sort of LUT?
1600 if (s->above_intra_ctx[col]) {
1601 if (s->left_intra_ctx[row7]) {
1604 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1606 } else if (s->left_intra_ctx[row7]) {
1607 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1609 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1611 if (refl == refa && refa == s->varcompref[1]) {
1613 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1614 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1615 (refl == s->fixcompref && refa == s->varcompref[0])) {
1618 c = (refa == refl) ? 3 : 1;
1620 } else if (!s->left_comp_ctx[row7]) {
1621 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1624 c = (refl == s->varcompref[1] &&
1625 refa != s->varcompref[1]) ? 2 : 4;
1627 } else if (!s->above_comp_ctx[col]) {
1628 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1631 c = (refa == s->varcompref[1] &&
1632 refl != s->varcompref[1]) ? 2 : 4;
1635 c = (refl == refa) ? 4 : 2;
1639 if (s->above_intra_ctx[col]) {
1641 } else if (s->above_comp_ctx[col]) {
1642 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1644 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1647 } else if (have_l) {
1648 if (s->left_intra_ctx[row7]) {
1650 } else if (s->left_comp_ctx[row7]) {
1651 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1653 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1658 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1659 b->ref[var_idx] = s->varcompref[bit];
1660 s->counts.comp_ref[c][bit]++;
1661 } else /* single reference */ {
1664 if (have_a && !s->above_intra_ctx[col]) {
1665 if (have_l && !s->left_intra_ctx[row7]) {
1666 if (s->left_comp_ctx[row7]) {
1667 if (s->above_comp_ctx[col]) {
1668 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1669 !s->above_ref_ctx[col]);
1671 c = (3 * !s->above_ref_ctx[col]) +
1672 (!s->fixcompref || !s->left_ref_ctx[row7]);
1674 } else if (s->above_comp_ctx[col]) {
1675 c = (3 * !s->left_ref_ctx[row7]) +
1676 (!s->fixcompref || !s->above_ref_ctx[col]);
1678 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1680 } else if (s->above_intra_ctx[col]) {
1682 } else if (s->above_comp_ctx[col]) {
1683 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1685 c = 4 * (!s->above_ref_ctx[col]);
1687 } else if (have_l && !s->left_intra_ctx[row7]) {
1688 if (s->left_intra_ctx[row7]) {
1690 } else if (s->left_comp_ctx[row7]) {
1691 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1693 c = 4 * (!s->left_ref_ctx[row7]);
1698 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1699 s->counts.single_ref[c][0][bit]++;
1703 // FIXME can this codeblob be replaced by some sort of LUT?
1706 if (s->left_intra_ctx[row7]) {
1707 if (s->above_intra_ctx[col]) {
1709 } else if (s->above_comp_ctx[col]) {
1710 c = 1 + 2 * (s->fixcompref == 1 ||
1711 s->above_ref_ctx[col] == 1);
1712 } else if (!s->above_ref_ctx[col]) {
1715 c = 4 * (s->above_ref_ctx[col] == 1);
1717 } else if (s->above_intra_ctx[col]) {
1718 if (s->left_intra_ctx[row7]) {
1720 } else if (s->left_comp_ctx[row7]) {
1721 c = 1 + 2 * (s->fixcompref == 1 ||
1722 s->left_ref_ctx[row7] == 1);
1723 } else if (!s->left_ref_ctx[row7]) {
1726 c = 4 * (s->left_ref_ctx[row7] == 1);
1728 } else if (s->above_comp_ctx[col]) {
1729 if (s->left_comp_ctx[row7]) {
1730 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1731 c = 3 * (s->fixcompref == 1 ||
1732 s->left_ref_ctx[row7] == 1);
1736 } else if (!s->left_ref_ctx[row7]) {
1737 c = 1 + 2 * (s->fixcompref == 1 ||
1738 s->above_ref_ctx[col] == 1);
1740 c = 3 * (s->left_ref_ctx[row7] == 1) +
1741 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1743 } else if (s->left_comp_ctx[row7]) {
1744 if (!s->above_ref_ctx[col]) {
1745 c = 1 + 2 * (s->fixcompref == 1 ||
1746 s->left_ref_ctx[row7] == 1);
1748 c = 3 * (s->above_ref_ctx[col] == 1) +
1749 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1751 } else if (!s->above_ref_ctx[col]) {
1752 if (!s->left_ref_ctx[row7]) {
1755 c = 4 * (s->left_ref_ctx[row7] == 1);
1757 } else if (!s->left_ref_ctx[row7]) {
1758 c = 4 * (s->above_ref_ctx[col] == 1);
1760 c = 2 * (s->left_ref_ctx[row7] == 1) +
1761 2 * (s->above_ref_ctx[col] == 1);
1764 if (s->above_intra_ctx[col] ||
1765 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1767 } else if (s->above_comp_ctx[col]) {
1768 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1770 c = 4 * (s->above_ref_ctx[col] == 1);
1773 } else if (have_l) {
1774 if (s->left_intra_ctx[row7] ||
1775 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1777 } else if (s->left_comp_ctx[row7]) {
1778 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1780 c = 4 * (s->left_ref_ctx[row7] == 1);
1785 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1786 s->counts.single_ref[c][1][bit]++;
1787 b->ref[0] = 1 + bit;
1792 if (b->bs <= BS_8x8) {
1793 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1794 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1796 static const uint8_t off[10] = {
1797 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1800 // FIXME this needs to use the LUT tables from find_ref_mvs
1801 // because not all are -1,0/0,-1
1802 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1803 [s->left_mode_ctx[row7 + off[b->bs]]];
1805 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1806 s->prob.p.mv_mode[c]);
1807 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1808 s->counts.mv_mode[c][b->mode[0] - 10]++;
1812 if (s->filtermode == FILTER_SWITCHABLE) {
1815 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1816 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1817 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1818 s->left_filter_ctx[row7] : 3;
1820 c = s->above_filter_ctx[col];
1822 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1823 c = s->left_filter_ctx[row7];
1828 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1829 s->prob.p.filter[c]);
1830 s->counts.filter[c][filter_id]++;
1831 b->filter = vp9_filter_lut[filter_id];
1833 b->filter = s->filtermode;
1836 if (b->bs > BS_8x8) {
1837 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1839 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1840 s->prob.p.mv_mode[c]);
1841 s->counts.mv_mode[c][b->mode[0] - 10]++;
1842 fill_mv(s, b->mv[0], b->mode[0], 0);
1844 if (b->bs != BS_8x4) {
1845 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1846 s->prob.p.mv_mode[c]);
1847 s->counts.mv_mode[c][b->mode[1] - 10]++;
1848 fill_mv(s, b->mv[1], b->mode[1], 1);
1850 b->mode[1] = b->mode[0];
1851 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1852 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1855 if (b->bs != BS_4x8) {
1856 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1857 s->prob.p.mv_mode[c]);
1858 s->counts.mv_mode[c][b->mode[2] - 10]++;
1859 fill_mv(s, b->mv[2], b->mode[2], 2);
1861 if (b->bs != BS_8x4) {
1862 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1863 s->prob.p.mv_mode[c]);
1864 s->counts.mv_mode[c][b->mode[3] - 10]++;
1865 fill_mv(s, b->mv[3], b->mode[3], 3);
1867 b->mode[3] = b->mode[2];
1868 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1869 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1872 b->mode[2] = b->mode[0];
1873 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1874 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1875 b->mode[3] = b->mode[1];
1876 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1877 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1880 fill_mv(s, b->mv[0], b->mode[0], -1);
1881 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1882 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1883 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1884 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1885 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1886 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1889 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1893 #define SPLAT_CTX(var, val, n) \
1895 case 1: var = val; break; \
1896 case 2: AV_WN16A(&var, val * 0x0101); break; \
1897 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1898 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1900 uint64_t v64 = val * 0x0101010101010101ULL; \
1901 AV_WN64A( &var, v64); \
1902 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1907 #define SPLAT_CTX(var, val, n) \
1909 case 1: var = val; break; \
1910 case 2: AV_WN16A(&var, val * 0x0101); break; \
1911 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1913 uint32_t v32 = val * 0x01010101; \
1914 AV_WN32A( &var, v32); \
1915 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1919 uint32_t v32 = val * 0x01010101; \
1920 AV_WN32A( &var, v32); \
1921 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1922 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1923 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1929 switch (bwh_tab[1][b->bs][0]) {
1930 #define SET_CTXS(dir, off, n) \
1932 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1933 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1934 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1935 if (!s->keyframe && !s->intraonly) { \
1936 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1937 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1938 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1940 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1941 if (s->filtermode == FILTER_SWITCHABLE) { \
1942 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1947 case 1: SET_CTXS(above, col, 1); break;
1948 case 2: SET_CTXS(above, col, 2); break;
1949 case 4: SET_CTXS(above, col, 4); break;
1950 case 8: SET_CTXS(above, col, 8); break;
1952 switch (bwh_tab[1][b->bs][1]) {
1953 case 1: SET_CTXS(left, row7, 1); break;
1954 case 2: SET_CTXS(left, row7, 2); break;
1955 case 4: SET_CTXS(left, row7, 4); break;
1956 case 8: SET_CTXS(left, row7, 8); break;
1961 if (!s->keyframe && !s->intraonly) {
1962 if (b->bs > BS_8x8) {
1963 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1965 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1966 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1967 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1968 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1969 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1970 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1971 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1972 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1974 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1976 for (n = 0; n < w4 * 2; n++) {
1977 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1978 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1980 for (n = 0; n < h4 * 2; n++) {
1981 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1982 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1988 for (y = 0; y < h4; y++) {
1989 int x, o = (row + y) * s->sb_cols * 8 + col;
1990 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1993 for (x = 0; x < w4; x++) {
1997 } else if (b->comp) {
1998 for (x = 0; x < w4; x++) {
1999 mv[x].ref[0] = b->ref[0];
2000 mv[x].ref[1] = b->ref[1];
2001 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2002 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2005 for (x = 0; x < w4; x++) {
2006 mv[x].ref[0] = b->ref[0];
2008 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2014 // FIXME merge cnt/eob arguments?
2015 static av_always_inline int
2016 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2017 int is_tx32x32, unsigned (*cnt)[6][3],
2018 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2019 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2020 const int16_t *band_counts, const int16_t *qmul)
2022 int i = 0, band = 0, band_left = band_counts[band];
2023 uint8_t *tp = p[0][nnz];
2024 uint8_t cache[1024];
2029 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2030 eob[band][nnz][val]++;
2035 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2036 cnt[band][nnz][0]++;
2038 band_left = band_counts[++band];
2040 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2042 if (++i == n_coeffs)
2043 break; //invalid input; blocks should end with EOB
2048 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2049 cnt[band][nnz][1]++;
2053 // fill in p[3-10] (model fill) - only once per frame for each pos
2055 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2057 cnt[band][nnz][2]++;
2058 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2059 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2060 cache[rc] = val = 2;
2062 val = 3 + vp56_rac_get_prob(c, tp[5]);
2065 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2067 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2068 val = 5 + vp56_rac_get_prob(c, 159);
2070 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2071 val += vp56_rac_get_prob(c, 145);
2075 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2076 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2077 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2078 val += (vp56_rac_get_prob(c, 148) << 1);
2079 val += vp56_rac_get_prob(c, 140);
2081 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2082 val += (vp56_rac_get_prob(c, 155) << 2);
2083 val += (vp56_rac_get_prob(c, 140) << 1);
2084 val += vp56_rac_get_prob(c, 135);
2086 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2087 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2088 val += (vp56_rac_get_prob(c, 157) << 3);
2089 val += (vp56_rac_get_prob(c, 141) << 2);
2090 val += (vp56_rac_get_prob(c, 134) << 1);
2091 val += vp56_rac_get_prob(c, 130);
2093 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2094 val += (vp56_rac_get_prob(c, 254) << 12);
2095 val += (vp56_rac_get_prob(c, 254) << 11);
2096 val += (vp56_rac_get_prob(c, 252) << 10);
2097 val += (vp56_rac_get_prob(c, 249) << 9);
2098 val += (vp56_rac_get_prob(c, 243) << 8);
2099 val += (vp56_rac_get_prob(c, 230) << 7);
2100 val += (vp56_rac_get_prob(c, 196) << 6);
2101 val += (vp56_rac_get_prob(c, 177) << 5);
2102 val += (vp56_rac_get_prob(c, 153) << 4);
2103 val += (vp56_rac_get_prob(c, 140) << 3);
2104 val += (vp56_rac_get_prob(c, 133) << 2);
2105 val += (vp56_rac_get_prob(c, 130) << 1);
2106 val += vp56_rac_get_prob(c, 129);
2111 band_left = band_counts[++band];
2113 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2115 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2116 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2118 } while (++i < n_coeffs);
2123 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2124 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2125 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2126 const int16_t (*nb)[2], const int16_t *band_counts,
2127 const int16_t *qmul)
2129 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2130 nnz, scan, nb, band_counts, qmul);
2133 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2134 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2135 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2136 const int16_t (*nb)[2], const int16_t *band_counts,
2137 const int16_t *qmul)
2139 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2140 nnz, scan, nb, band_counts, qmul);
2143 static void decode_coeffs(AVCodecContext *ctx)
2145 VP9Context *s = ctx->priv_data;
2147 int row = s->row, col = s->col;
2148 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2149 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2150 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2151 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2152 int end_x = FFMIN(2 * (s->cols - col), w4);
2153 int end_y = FFMIN(2 * (s->rows - row), h4);
2154 int n, pl, x, y, res;
2155 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2156 int tx = 4 * s->lossless + b->tx;
2157 const int16_t * const *yscans = vp9_scans[tx];
2158 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2159 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2160 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2161 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2162 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2163 static const int16_t band_counts[4][8] = {
2164 { 1, 2, 3, 4, 3, 16 - 13 },
2165 { 1, 2, 3, 4, 11, 64 - 21 },
2166 { 1, 2, 3, 4, 11, 256 - 21 },
2167 { 1, 2, 3, 4, 11, 1024 - 21 },
2169 const int16_t *y_band_counts = band_counts[b->tx];
2170 const int16_t *uv_band_counts = band_counts[b->uvtx];
2172 #define MERGE(la, end, step, rd) \
2173 for (n = 0; n < end; n += step) \
2174 la[n] = !!rd(&la[n])
2175 #define MERGE_CTX(step, rd) \
2177 MERGE(l, end_y, step, rd); \
2178 MERGE(a, end_x, step, rd); \
2181 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2182 for (n = 0, y = 0; y < end_y; y += step) { \
2183 for (x = 0; x < end_x; x += step, n += step * step) { \
2184 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2185 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2186 c, e, p, a[x] + l[y], yscans[txtp], \
2187 ynbs[txtp], y_band_counts, qmul[0]); \
2188 a[x] = l[y] = !!res; \
2190 AV_WN16A(&s->eob[n], res); \
2197 #define SPLAT(la, end, step, cond) \
2199 for (n = 1; n < end; n += step) \
2200 la[n] = la[n - 1]; \
2201 } else if (step == 4) { \
2203 for (n = 0; n < end; n += step) \
2204 AV_WN32A(&la[n], la[n] * 0x01010101); \
2206 for (n = 0; n < end; n += step) \
2207 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2209 } else /* step == 8 */ { \
2211 if (HAVE_FAST_64BIT) { \
2212 for (n = 0; n < end; n += step) \
2213 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2215 for (n = 0; n < end; n += step) { \
2216 uint32_t v32 = la[n] * 0x01010101; \
2217 AV_WN32A(&la[n], v32); \
2218 AV_WN32A(&la[n + 4], v32); \
2222 for (n = 0; n < end; n += step) \
2223 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2226 #define SPLAT_CTX(step) \
2228 SPLAT(a, end_x, step, end_x == w4); \
2229 SPLAT(l, end_y, step, end_y == h4); \
2235 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2238 MERGE_CTX(2, AV_RN16A);
2239 DECODE_Y_COEF_LOOP(2, 0,);
2243 MERGE_CTX(4, AV_RN32A);
2244 DECODE_Y_COEF_LOOP(4, 0,);
2248 MERGE_CTX(8, AV_RN64A);
2249 DECODE_Y_COEF_LOOP(8, 0, 32);
2254 #define DECODE_UV_COEF_LOOP(step) \
2255 for (n = 0, y = 0; y < end_y; y += step) { \
2256 for (x = 0; x < end_x; x += step, n += step * step) { \
2257 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2258 16 * step * step, c, e, p, a[x] + l[y], \
2259 uvscan, uvnb, uv_band_counts, qmul[1]); \
2260 a[x] = l[y] = !!res; \
2261 s->uveob[pl][n] = res; \
2265 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2266 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2267 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2272 for (pl = 0; pl < 2; pl++) {
2273 a = &s->above_uv_nnz_ctx[pl][col];
2274 l = &s->left_uv_nnz_ctx[pl][row & 7];
2277 DECODE_UV_COEF_LOOP(1);
2280 MERGE_CTX(2, AV_RN16A);
2281 DECODE_UV_COEF_LOOP(2);
2285 MERGE_CTX(4, AV_RN32A);
2286 DECODE_UV_COEF_LOOP(4);
2290 MERGE_CTX(8, AV_RN64A);
2291 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2292 // so there is no need to loop
2293 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2294 1024, c, e, p, a[0] + l[0],
2295 uvscan, uvnb, uv_band_counts, qmul[1]);
2296 a[0] = l[0] = !!res;
2297 AV_WN16A(&s->uveob[pl][0], res);
2304 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2305 uint8_t *dst_edge, ptrdiff_t stride_edge,
2306 uint8_t *dst_inner, ptrdiff_t stride_inner,
2307 uint8_t *l, int col, int x, int w,
2308 int row, int y, enum TxfmMode tx,
2311 int have_top = row > 0 || y > 0;
2312 int have_left = col > s->tiling.tile_col_start || x > 0;
2313 int have_right = x < w - 1;
2314 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2315 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2316 { DC_127_PRED, VERT_PRED } },
2317 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2318 { HOR_PRED, HOR_PRED } },
2319 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2320 { LEFT_DC_PRED, DC_PRED } },
2321 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2322 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2323 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2324 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2325 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2326 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2327 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2328 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2329 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2330 { DC_127_PRED, VERT_LEFT_PRED } },
2331 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2332 { HOR_UP_PRED, HOR_UP_PRED } },
2333 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2334 { HOR_PRED, TM_VP8_PRED } },
2336 static const struct {
2337 uint8_t needs_left:1;
2338 uint8_t needs_top:1;
2339 uint8_t needs_topleft:1;
2340 uint8_t needs_topright:1;
2341 } edges[N_INTRA_PRED_MODES] = {
2342 [VERT_PRED] = { .needs_top = 1 },
2343 [HOR_PRED] = { .needs_left = 1 },
2344 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2345 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2346 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2347 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2348 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2349 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2350 [HOR_UP_PRED] = { .needs_left = 1 },
2351 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2352 [LEFT_DC_PRED] = { .needs_left = 1 },
2353 [TOP_DC_PRED] = { .needs_top = 1 },
2354 [DC_128_PRED] = { 0 },
2355 [DC_127_PRED] = { 0 },
2356 [DC_129_PRED] = { 0 }
2359 av_assert2(mode >= 0 && mode < 10);
2360 mode = mode_conv[mode][have_left][have_top];
2361 if (edges[mode].needs_top) {
2362 uint8_t *top, *topleft;
2363 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2364 int n_px_need_tr = 0;
2366 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2369 // if top of sb64-row, use s->intra_pred_data[] instead of
2370 // dst[-stride] for intra prediction (it contains pre- instead of
2371 // post-loopfilter data)
2373 top = !(row & 7) && !y ?
2374 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2375 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2377 topleft = !(row & 7) && !y ?
2378 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2379 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2380 &dst_inner[-stride_inner];
2384 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2385 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2386 n_px_need + n_px_need_tr <= n_px_have) {
2390 if (n_px_need <= n_px_have) {
2391 memcpy(*a, top, n_px_need);
2393 memcpy(*a, top, n_px_have);
2394 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2395 n_px_need - n_px_have);
2398 memset(*a, 127, n_px_need);
2400 if (edges[mode].needs_topleft) {
2401 if (have_left && have_top) {
2402 (*a)[-1] = topleft[-1];
2404 (*a)[-1] = have_top ? 129 : 127;
2407 if (tx == TX_4X4 && edges[mode].needs_topright) {
2408 if (have_top && have_right &&
2409 n_px_need + n_px_need_tr <= n_px_have) {
2410 memcpy(&(*a)[4], &top[4], 4);
2412 memset(&(*a)[4], (*a)[3], 4);
2417 if (edges[mode].needs_left) {
2419 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2420 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2421 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2423 if (n_px_need <= n_px_have) {
2424 for (i = 0; i < n_px_need; i++)
2425 l[n_px_need - 1 - i] = dst[i * stride - 1];
2427 for (i = 0; i < n_px_have; i++)
2428 l[n_px_need - 1 - i] = dst[i * stride - 1];
2429 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2432 memset(l, 129, 4 << tx);
2439 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2441 VP9Context *s = ctx->priv_data;
2443 int row = s->row, col = s->col;
2444 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2445 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2446 int end_x = FFMIN(2 * (s->cols - col), w4);
2447 int end_y = FFMIN(2 * (s->rows - row), h4);
2448 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2449 int uvstep1d = 1 << b->uvtx, p;
2450 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2451 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2452 LOCAL_ALIGNED_16(uint8_t, l, [32]);
2454 for (n = 0, y = 0; y < end_y; y += step1d) {
2455 uint8_t *ptr = dst, *ptr_r = dst_r;
2456 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2457 ptr_r += 4 * step1d, n += step) {
2458 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2460 uint8_t *a = &a_buf[16];
2461 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2462 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2464 mode = check_intra_mode(s, mode, &a, ptr_r,
2465 s->frames[CUR_FRAME].tf.f->linesize[0],
2466 ptr, s->y_stride, l,
2467 col, x, w4, row, y, b->tx, 0);
2468 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2470 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2471 s->block + 16 * n, eob);
2473 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2474 dst += 4 * step1d * s->y_stride;
2482 step = 1 << (b->uvtx * 2);
2483 for (p = 0; p < 2; p++) {
2484 dst = s->dst[1 + p];
2485 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2486 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2487 uint8_t *ptr = dst, *ptr_r = dst_r;
2488 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2489 ptr_r += 4 * uvstep1d, n += step) {
2490 int mode = b->uvmode;
2491 uint8_t *a = &a_buf[16];
2492 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2494 mode = check_intra_mode(s, mode, &a, ptr_r,
2495 s->frames[CUR_FRAME].tf.f->linesize[1],
2496 ptr, s->uv_stride, l,
2497 col, x, w4, row, y, b->uvtx, p + 1);
2498 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2500 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2501 s->uvblock[p] + 16 * n, eob);
2503 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2504 dst += 4 * uvstep1d * s->uv_stride;
2509 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2510 uint8_t *dst, ptrdiff_t dst_stride,
2511 const uint8_t *ref, ptrdiff_t ref_stride,
2512 ThreadFrame *ref_frame,
2513 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2514 int bw, int bh, int w, int h)
2516 int mx = mv->x, my = mv->y, th;
2520 ref += y * ref_stride + x;
2523 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2524 // we use +7 because the last 7 pixels of each sbrow can be changed in
2525 // the longest loopfilter of the next sbrow
2526 th = (y + bh + 4 * !!my + 7) >> 6;
2527 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2528 if (x < !!mx * 3 || y < !!my * 3 ||
2529 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2530 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2531 ref - !!my * 3 * ref_stride - !!mx * 3,
2533 bw + !!mx * 7, bh + !!my * 7,
2534 x - !!mx * 3, y - !!my * 3, w, h);
2535 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2538 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2541 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2542 uint8_t *dst_u, uint8_t *dst_v,
2543 ptrdiff_t dst_stride,
2544 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2545 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2546 ThreadFrame *ref_frame,
2547 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2548 int bw, int bh, int w, int h)
2550 int mx = mv->x, my = mv->y, th;
2554 ref_u += y * src_stride_u + x;
2555 ref_v += y * src_stride_v + x;
2558 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2559 // we use +7 because the last 7 pixels of each sbrow can be changed in
2560 // the longest loopfilter of the next sbrow
2561 th = (y + bh + 4 * !!my + 7) >> 5;
2562 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2563 if (x < !!mx * 3 || y < !!my * 3 ||
2564 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2565 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2566 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2568 bw + !!mx * 7, bh + !!my * 7,
2569 x - !!mx * 3, y - !!my * 3, w, h);
2570 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2571 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2573 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2574 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2576 bw + !!mx * 7, bh + !!my * 7,
2577 x - !!mx * 3, y - !!my * 3, w, h);
2578 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2579 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2581 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2582 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2586 static void inter_recon(AVCodecContext *ctx)
2588 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2589 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2590 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2592 VP9Context *s = ctx->priv_data;
2594 int row = s->row, col = s->col;
2595 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2596 AVFrame *ref1 = tref1->f, *ref2;
2597 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2598 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2601 tref2 = &s->refs[s->refidx[b->ref[1]]];
2608 if (b->bs > BS_8x8) {
2609 if (b->bs == BS_8x4) {
2610 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2611 ref1->data[0], ref1->linesize[0], tref1,
2612 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2613 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2614 s->dst[0] + 4 * ls_y, ls_y,
2615 ref1->data[0], ref1->linesize[0], tref1,
2616 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2619 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2620 ref2->data[0], ref2->linesize[0], tref2,
2621 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2622 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2623 s->dst[0] + 4 * ls_y, ls_y,
2624 ref2->data[0], ref2->linesize[0], tref2,
2625 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2627 } else if (b->bs == BS_4x8) {
2628 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2629 ref1->data[0], ref1->linesize[0], tref1,
2630 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2631 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2632 ref1->data[0], ref1->linesize[0], tref1,
2633 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2636 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2637 ref2->data[0], ref2->linesize[0], tref2,
2638 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2639 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2640 ref2->data[0], ref2->linesize[0], tref2,
2641 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2644 av_assert2(b->bs == BS_4x4);
2646 // FIXME if two horizontally adjacent blocks have the same MV,
2647 // do a w8 instead of a w4 call
2648 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2649 ref1->data[0], ref1->linesize[0], tref1,
2650 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2651 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2652 ref1->data[0], ref1->linesize[0], tref1,
2653 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2654 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2655 s->dst[0] + 4 * ls_y, ls_y,
2656 ref1->data[0], ref1->linesize[0], tref1,
2657 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2658 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2659 s->dst[0] + 4 * ls_y + 4, ls_y,
2660 ref1->data[0], ref1->linesize[0], tref1,
2661 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2664 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2665 ref2->data[0], ref2->linesize[0], tref2,
2666 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2667 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2668 ref2->data[0], ref2->linesize[0], tref2,
2669 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2670 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2671 s->dst[0] + 4 * ls_y, ls_y,
2672 ref2->data[0], ref2->linesize[0], tref2,
2673 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2674 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2675 s->dst[0] + 4 * ls_y + 4, ls_y,
2676 ref2->data[0], ref2->linesize[0], tref2,
2677 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2681 int bwl = bwlog_tab[0][b->bs];
2682 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2684 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2685 ref1->data[0], ref1->linesize[0], tref1,
2686 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2689 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2690 ref2->data[0], ref2->linesize[0], tref2,
2691 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2696 int bwl = bwlog_tab[1][b->bs];
2697 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2706 if (b->bs > BS_8x8) {
2707 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2708 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2713 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2714 s->dst[1], s->dst[2], ls_uv,
2715 ref1->data[1], ref1->linesize[1],
2716 ref1->data[2], ref1->linesize[2], tref1,
2717 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2720 if (b->bs > BS_8x8) {
2721 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2722 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2726 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2727 s->dst[1], s->dst[2], ls_uv,
2728 ref2->data[1], ref2->linesize[1],
2729 ref2->data[2], ref2->linesize[2], tref2,
2730 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2735 /* mostly copied intra_reconn() */
2737 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2738 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2739 int end_x = FFMIN(2 * (s->cols - col), w4);
2740 int end_y = FFMIN(2 * (s->rows - row), h4);
2741 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2742 int uvstep1d = 1 << b->uvtx, p;
2743 uint8_t *dst = s->dst[0];
2746 for (n = 0, y = 0; y < end_y; y += step1d) {
2748 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2749 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2752 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2753 s->block + 16 * n, eob);
2755 dst += 4 * s->y_stride * step1d;
2763 step = 1 << (b->uvtx * 2);
2764 for (p = 0; p < 2; p++) {
2765 dst = s->dst[p + 1];
2766 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2768 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2769 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2772 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2773 s->uvblock[p] + 16 * n, eob);
2775 dst += 4 * uvstep1d * s->uv_stride;
2781 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2782 int row_and_7, int col_and_7,
2783 int w, int h, int col_end, int row_end,
2784 enum TxfmMode tx, int skip_inter)
2786 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2787 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2788 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2789 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2791 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2792 // edges. This means that for UV, we work on two subsampled blocks at
2793 // a time, and we only use the topleft block's mode information to set
2794 // things like block strength. Thus, for any block size smaller than
2795 // 16x16, ignore the odd portion of the block.
2796 if (tx == TX_4X4 && is_uv) {
2811 if (tx == TX_4X4 && !skip_inter) {
2812 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2813 int m_col_odd = (t << (w - 1)) - t;
2815 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2817 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2819 for (y = row_and_7; y < h + row_and_7; y++) {
2820 int col_mask_id = 2 - !(y & 7);
2822 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2823 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2824 // for odd lines, if the odd col is not being filtered,
2825 // skip odd row also:
2832 // if a/c are even row/col and b/d are odd, and d is skipped,
2833 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2834 if ((col_end & 1) && (y & 1)) {
2835 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2837 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2841 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2843 for (y = row_and_7; y < h + row_and_7; y++) {
2844 int col_mask_id = 2 - !(y & 3);
2846 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2847 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2848 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2849 lflvl->mask[is_uv][0][y][3] |= m_col;
2850 lflvl->mask[is_uv][1][y][3] |= m_col;
2854 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2857 int mask_id = (tx == TX_8X8);
2858 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2859 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2860 int m_row = m_col & masks[l2];
2862 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2863 // 8wd loopfilter to prevent going off the visible edge.
2864 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2865 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2866 int m_row_8 = m_row - m_row_16;
2868 for (y = row_and_7; y < h + row_and_7; y++) {
2869 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2870 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2873 for (y = row_and_7; y < h + row_and_7; y++)
2874 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2877 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2878 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2879 lflvl->mask[is_uv][1][y][0] |= m_col;
2880 if (y - row_and_7 == h - 1)
2881 lflvl->mask[is_uv][1][y][1] |= m_col;
2883 for (y = row_and_7; y < h + row_and_7; y += step1d)
2884 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2886 } else if (tx != TX_4X4) {
2889 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2890 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2891 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2892 for (y = row_and_7; y < h + row_and_7; y++)
2893 lflvl->mask[is_uv][0][y][mask_id] |= t;
2895 int t8 = t & 0x01, t4 = t - t8;
2897 for (y = row_and_7; y < h + row_and_7; y++) {
2898 lflvl->mask[is_uv][0][y][2] |= t4;
2899 lflvl->mask[is_uv][0][y][1] |= t8;
2901 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2903 int t8 = t & 0x11, t4 = t - t8;
2905 for (y = row_and_7; y < h + row_and_7; y++) {
2906 lflvl->mask[is_uv][0][y][2] |= t4;
2907 lflvl->mask[is_uv][0][y][1] |= t8;
2909 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2914 static void decode_b(AVCodecContext *ctx, int row, int col,
2915 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2916 enum BlockLevel bl, enum BlockPartition bp)
2918 VP9Context *s = ctx->priv_data;
2920 enum BlockSize bs = bl * 3 + bp;
2921 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2923 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2929 s->min_mv.x = -(128 + col * 64);
2930 s->min_mv.y = -(128 + row * 64);
2931 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2932 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2938 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2945 #define SPLAT_ZERO_CTX(v, n) \
2947 case 1: v = 0; break; \
2948 case 2: AV_ZERO16(&v); break; \
2949 case 4: AV_ZERO32(&v); break; \
2950 case 8: AV_ZERO64(&v); break; \
2951 case 16: AV_ZERO128(&v); break; \
2953 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2955 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2956 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2957 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2961 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2962 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2963 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2964 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2967 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2968 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2969 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2970 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2975 s->block += w4 * h4 * 64;
2976 s->uvblock[0] += w4 * h4 * 16;
2977 s->uvblock[1] += w4 * h4 * 16;
2978 s->eob += 4 * w4 * h4;
2979 s->uveob[0] += w4 * h4;
2980 s->uveob[1] += w4 * h4;
2986 // emulated overhangs if the stride of the target buffer can't hold. This
2987 // allows to support emu-edge and so on even if we have large block
2989 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2990 (row + h4) > s->rows;
2991 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2992 (row + h4) > s->rows;
2994 s->dst[0] = s->tmp_y;
2997 s->dst[0] = f->data[0] + yoff;
2998 s->y_stride = f->linesize[0];
3001 s->dst[1] = s->tmp_uv[0];
3002 s->dst[2] = s->tmp_uv[1];
3005 s->dst[1] = f->data[1] + uvoff;
3006 s->dst[2] = f->data[2] + uvoff;
3007 s->uv_stride = f->linesize[1];
3010 intra_recon(ctx, yoff, uvoff);
3015 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3017 for (n = 0; o < w; n++) {
3022 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3023 s->tmp_y + o, 64, h, 0, 0);
3029 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3031 for (n = 1; o < w; n++) {
3036 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3037 s->tmp_uv[0] + o, 32, h, 0, 0);
3038 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3039 s->tmp_uv[1] + o, 32, h, 0, 0);
3045 // pick filter level and find edges to apply filter to
3046 if (s->filter.level &&
3047 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3048 [b->mode[3] != ZEROMV]) > 0) {
3049 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3050 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3052 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3053 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3054 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3055 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3056 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3057 b->uvtx, skip_inter);
3059 if (!s->filter.lim_lut[lvl]) {
3060 int sharp = s->filter.sharpness;
3064 limit >>= (sharp + 3) >> 2;
3065 limit = FFMIN(limit, 9 - sharp);
3067 limit = FFMAX(limit, 1);
3069 s->filter.lim_lut[lvl] = limit;
3070 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3076 s->block += w4 * h4 * 64;
3077 s->uvblock[0] += w4 * h4 * 16;
3078 s->uvblock[1] += w4 * h4 * 16;
3079 s->eob += 4 * w4 * h4;
3080 s->uveob[0] += w4 * h4;
3081 s->uveob[1] += w4 * h4;
3085 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3086 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3088 VP9Context *s = ctx->priv_data;
3089 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3090 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3091 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3092 s->prob.p.partition[bl][c];
3093 enum BlockPartition bp;
3094 ptrdiff_t hbs = 4 >> bl;
3095 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3096 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3099 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3100 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3101 } else if (col + hbs < s->cols) { // FIXME why not <=?
3102 if (row + hbs < s->rows) { // FIXME why not <=?
3103 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3105 case PARTITION_NONE:
3106 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3109 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3110 yoff += hbs * 8 * y_stride;
3111 uvoff += hbs * 4 * uv_stride;
3112 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3115 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3118 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3120 case PARTITION_SPLIT:
3121 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3122 decode_sb(ctx, row, col + hbs, lflvl,
3123 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3124 yoff += hbs * 8 * y_stride;
3125 uvoff += hbs * 4 * uv_stride;
3126 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3127 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3128 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3133 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3134 bp = PARTITION_SPLIT;
3135 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3136 decode_sb(ctx, row, col + hbs, lflvl,
3137 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3140 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3142 } else if (row + hbs < s->rows) { // FIXME why not <=?
3143 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3144 bp = PARTITION_SPLIT;
3145 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3146 yoff += hbs * 8 * y_stride;
3147 uvoff += hbs * 4 * uv_stride;
3148 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3151 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3154 bp = PARTITION_SPLIT;
3155 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3157 s->counts.partition[bl][c][bp]++;
3160 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3161 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3163 VP9Context *s = ctx->priv_data;
3165 ptrdiff_t hbs = 4 >> bl;
3166 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3167 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3170 av_assert2(b->bl == BL_8X8);
3171 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3172 } else if (s->b->bl == bl) {
3173 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3174 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3175 yoff += hbs * 8 * y_stride;
3176 uvoff += hbs * 4 * uv_stride;
3177 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3178 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3181 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3184 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3185 if (col + hbs < s->cols) { // FIXME why not <=?
3186 if (row + hbs < s->rows) {
3187 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3188 uvoff + 4 * hbs, bl + 1);
3189 yoff += hbs * 8 * y_stride;
3190 uvoff += hbs * 4 * uv_stride;
3191 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3192 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3193 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3197 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3199 } else if (row + hbs < s->rows) {
3200 yoff += hbs * 8 * y_stride;
3201 uvoff += hbs * 4 * uv_stride;
3202 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3207 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3208 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3210 VP9Context *s = ctx->priv_data;
3211 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3212 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3213 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3216 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3217 // if you think of them as acting on a 8x8 block max, we can interleave
3218 // each v/h within the single x loop, but that only works if we work on
3219 // 8 pixel blocks, and we won't always do that (we want at least 16px
3220 // to use SSE2 optimizations, perhaps 32 for AVX2)
3222 // filter edges between columns, Y plane (e.g. block1 | block2)
3223 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3224 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3225 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3226 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3227 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3228 unsigned hm = hm1 | hm2 | hm13 | hm23;
3230 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3232 int L = *l, H = L >> 4;
3233 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3236 if (hmask1[0] & x) {
3237 if (hmask2[0] & x) {
3238 av_assert2(l[8] == L);
3239 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3241 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3243 } else if (hm2 & x) {
3246 E |= s->filter.mblim_lut[L] << 8;
3247 I |= s->filter.lim_lut[L] << 8;
3248 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3250 [0](ptr, ls_y, E, I, H);
3252 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3253 [0](ptr, ls_y, E, I, H);
3256 } else if (hm2 & x) {
3257 int L = l[8], H = L >> 4;
3258 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3261 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3262 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3266 int L = *l, H = L >> 4;
3267 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3272 E |= s->filter.mblim_lut[L] << 8;
3273 I |= s->filter.lim_lut[L] << 8;
3274 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3276 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3278 } else if (hm23 & x) {
3279 int L = l[8], H = L >> 4;
3280 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3282 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3288 // filter edges between rows, Y plane (e.g. ------)
3290 dst = f->data[0] + yoff;
3292 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3293 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3294 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3296 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3299 int L = *l, H = L >> 4;
3300 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3303 if (vmask[0] & (x << 1)) {
3304 av_assert2(l[1] == L);
3305 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3307 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3309 } else if (vm & (x << 1)) {
3312 E |= s->filter.mblim_lut[L] << 8;
3313 I |= s->filter.lim_lut[L] << 8;
3314 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3315 [!!(vmask[1] & (x << 1))]
3316 [1](ptr, ls_y, E, I, H);
3318 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3319 [1](ptr, ls_y, E, I, H);
3321 } else if (vm & (x << 1)) {
3322 int L = l[1], H = L >> 4;
3323 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3325 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3326 [1](ptr + 8, ls_y, E, I, H);
3330 int L = *l, H = L >> 4;
3331 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3333 if (vm3 & (x << 1)) {
3336 E |= s->filter.mblim_lut[L] << 8;
3337 I |= s->filter.lim_lut[L] << 8;
3338 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3340 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3342 } else if (vm3 & (x << 1)) {
3343 int L = l[1], H = L >> 4;
3344 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3346 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3351 // same principle but for U/V planes
3352 for (p = 0; p < 2; p++) {
3354 dst = f->data[1 + p] + uvoff;
3355 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3356 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3357 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3358 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3359 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3361 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3364 int L = *l, H = L >> 4;
3365 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3367 if (hmask1[0] & x) {
3368 if (hmask2[0] & x) {
3369 av_assert2(l[16] == L);
3370 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3372 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3374 } else if (hm2 & x) {
3377 E |= s->filter.mblim_lut[L] << 8;
3378 I |= s->filter.lim_lut[L] << 8;
3379 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3381 [0](ptr, ls_uv, E, I, H);
3383 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3384 [0](ptr, ls_uv, E, I, H);
3386 } else if (hm2 & x) {
3387 int L = l[16], H = L >> 4;
3388 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3390 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3391 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3399 dst = f->data[1 + p] + uvoff;
3400 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3401 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3402 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3404 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3407 int L = *l, H = L >> 4;
3408 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3411 if (vmask[0] & (x << 2)) {
3412 av_assert2(l[2] == L);
3413 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3415 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3417 } else if (vm & (x << 2)) {
3420 E |= s->filter.mblim_lut[L] << 8;
3421 I |= s->filter.lim_lut[L] << 8;
3422 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3423 [!!(vmask[1] & (x << 2))]
3424 [1](ptr, ls_uv, E, I, H);
3426 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3427 [1](ptr, ls_uv, E, I, H);
3429 } else if (vm & (x << 2)) {
3430 int L = l[2], H = L >> 4;
3431 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3433 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3434 [1](ptr + 8, ls_uv, E, I, H);
3444 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3446 int sb_start = ( idx * n) >> log2_n;
3447 int sb_end = ((idx + 1) * n) >> log2_n;
3448 *start = FFMIN(sb_start, n) << 3;
3449 *end = FFMIN(sb_end, n) << 3;
3452 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3453 int max_count, int update_factor)
3455 unsigned ct = ct0 + ct1, p2, p1;
3461 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3462 p2 = av_clip(p2, 1, 255);
3463 ct = FFMIN(ct, max_count);
3464 update_factor = FASTDIV(update_factor * ct, max_count);
3466 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3467 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3470 static void adapt_probs(VP9Context *s)
3473 prob_context *p = &s->prob_ctx[s->framectxid].p;
3474 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3477 for (i = 0; i < 4; i++)
3478 for (j = 0; j < 2; j++)
3479 for (k = 0; k < 2; k++)
3480 for (l = 0; l < 6; l++)
3481 for (m = 0; m < 6; m++) {
3482 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3483 unsigned *e = s->counts.eob[i][j][k][l][m];
3484 unsigned *c = s->counts.coef[i][j][k][l][m];
3486 if (l == 0 && m >= 3) // dc only has 3 pt
3489 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3490 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3491 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3494 if (s->keyframe || s->intraonly) {
3495 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3496 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3497 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3498 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3503 for (i = 0; i < 3; i++)
3504 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3507 for (i = 0; i < 4; i++)
3508 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3511 if (s->comppredmode == PRED_SWITCHABLE) {
3512 for (i = 0; i < 5; i++)
3513 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3517 if (s->comppredmode != PRED_SINGLEREF) {
3518 for (i = 0; i < 5; i++)
3519 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3520 s->counts.comp_ref[i][1], 20, 128);
3523 if (s->comppredmode != PRED_COMPREF) {
3524 for (i = 0; i < 5; i++) {
3525 uint8_t *pp = p->single_ref[i];
3526 unsigned (*c)[2] = s->counts.single_ref[i];
3528 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3529 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3533 // block partitioning
3534 for (i = 0; i < 4; i++)
3535 for (j = 0; j < 4; j++) {
3536 uint8_t *pp = p->partition[i][j];
3537 unsigned *c = s->counts.partition[i][j];
3539 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3540 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3541 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3545 if (s->txfmmode == TX_SWITCHABLE) {
3546 for (i = 0; i < 2; i++) {
3547 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3549 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3550 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3551 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3552 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3553 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3554 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3558 // interpolation filter
3559 if (s->filtermode == FILTER_SWITCHABLE) {
3560 for (i = 0; i < 4; i++) {
3561 uint8_t *pp = p->filter[i];
3562 unsigned *c = s->counts.filter[i];
3564 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3565 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3570 for (i = 0; i < 7; i++) {
3571 uint8_t *pp = p->mv_mode[i];
3572 unsigned *c = s->counts.mv_mode[i];
3574 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3575 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3576 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3581 uint8_t *pp = p->mv_joint;
3582 unsigned *c = s->counts.mv_joint;
3584 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3585 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3586 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3590 for (i = 0; i < 2; i++) {
3592 unsigned *c, (*c2)[2], sum;
3594 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3595 s->counts.mv_comp[i].sign[1], 20, 128);
3597 pp = p->mv_comp[i].classes;
3598 c = s->counts.mv_comp[i].classes;
3599 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3600 adapt_prob(&pp[0], c[0], sum, 20, 128);
3602 adapt_prob(&pp[1], c[1], sum, 20, 128);
3604 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3605 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3607 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3608 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3610 adapt_prob(&pp[6], c[6], sum, 20, 128);
3611 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3612 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3613 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3615 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3616 s->counts.mv_comp[i].class0[1], 20, 128);
3617 pp = p->mv_comp[i].bits;
3618 c2 = s->counts.mv_comp[i].bits;
3619 for (j = 0; j < 10; j++)
3620 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3622 for (j = 0; j < 2; j++) {
3623 pp = p->mv_comp[i].class0_fp[j];
3624 c = s->counts.mv_comp[i].class0_fp[j];
3625 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3626 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3627 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3629 pp = p->mv_comp[i].fp;
3630 c = s->counts.mv_comp[i].fp;
3631 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3632 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3633 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3635 if (s->highprecisionmvs) {
3636 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3637 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3638 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3639 s->counts.mv_comp[i].hp[1], 20, 128);
3644 for (i = 0; i < 4; i++) {
3645 uint8_t *pp = p->y_mode[i];
3646 unsigned *c = s->counts.y_mode[i], sum, s2;
3648 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3649 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3650 sum -= c[TM_VP8_PRED];
3651 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3652 sum -= c[VERT_PRED];
3653 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3654 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3656 adapt_prob(&pp[3], s2, sum, 20, 128);
3658 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3659 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3660 sum -= c[DIAG_DOWN_LEFT_PRED];
3661 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3662 sum -= c[VERT_LEFT_PRED];
3663 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3664 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3668 for (i = 0; i < 10; i++) {
3669 uint8_t *pp = p->uv_mode[i];
3670 unsigned *c = s->counts.uv_mode[i], sum, s2;
3672 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3673 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3674 sum -= c[TM_VP8_PRED];
3675 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3676 sum -= c[VERT_PRED];
3677 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3678 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3680 adapt_prob(&pp[3], s2, sum, 20, 128);
3682 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3683 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3684 sum -= c[DIAG_DOWN_LEFT_PRED];
3685 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3686 sum -= c[VERT_LEFT_PRED];
3687 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3688 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3692 static void free_buffers(VP9Context *s)
3694 av_freep(&s->intra_pred_data[0]);
3695 av_freep(&s->b_base);
3696 av_freep(&s->block_base);
3699 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3701 VP9Context *s = ctx->priv_data;
3704 for (i = 0; i < 2; i++) {
3705 if (s->frames[i].tf.f->data[0])
3706 vp9_unref_frame(ctx, &s->frames[i]);
3707 av_frame_free(&s->frames[i].tf.f);
3709 for (i = 0; i < 8; i++) {
3710 if (s->refs[i].f->data[0])
3711 ff_thread_release_buffer(ctx, &s->refs[i]);
3712 av_frame_free(&s->refs[i].f);
3713 if (s->next_refs[i].f->data[0])
3714 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3715 av_frame_free(&s->next_refs[i].f);
3725 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3726 int *got_frame, AVPacket *pkt)
3728 const uint8_t *data = pkt->data;
3729 int size = pkt->size;
3730 VP9Context *s = ctx->priv_data;
3731 int res, tile_row, tile_col, i, ref, row, col;
3732 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3735 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3737 } else if (res == 0) {
3738 if (!s->refs[ref].f->data[0]) {
3739 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3740 return AVERROR_INVALIDDATA;
3742 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3750 if (s->frames[LAST_FRAME].tf.f->data[0])
3751 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3752 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3753 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3755 if (s->frames[CUR_FRAME].tf.f->data[0])
3756 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3757 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3759 f = s->frames[CUR_FRAME].tf.f;
3760 f->key_frame = s->keyframe;
3761 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3762 ls_y = f->linesize[0];
3763 ls_uv =f->linesize[1];
3766 for (i = 0; i < 8; i++) {
3767 if (s->next_refs[i].f->data[0])
3768 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3769 if (s->refreshrefmask & (1 << i)) {
3770 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3772 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3778 // main tile decode loop
3779 memset(s->above_partition_ctx, 0, s->cols);
3780 memset(s->above_skip_ctx, 0, s->cols);
3781 if (s->keyframe || s->intraonly) {
3782 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3784 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3786 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3787 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3788 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3789 memset(s->above_segpred_ctx, 0, s->cols);
3790 s->pass = s->uses_2pass =
3791 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3792 if ((res = update_block_buffers(ctx)) < 0) {
3793 av_log(ctx, AV_LOG_ERROR,
3794 "Failed to allocate block buffers\n");
3797 if (s->refreshctx && s->parallelmode) {
3800 for (i = 0; i < 4; i++) {
3801 for (j = 0; j < 2; j++)
3802 for (k = 0; k < 2; k++)
3803 for (l = 0; l < 6; l++)
3804 for (m = 0; m < 6; m++)
3805 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3806 s->prob.coef[i][j][k][l][m], 3);
3807 if (s->txfmmode == i)
3810 s->prob_ctx[s->framectxid].p = s->prob.p;
3811 ff_thread_finish_setup(ctx);
3817 s->block = s->block_base;
3818 s->uvblock[0] = s->uvblock_base[0];
3819 s->uvblock[1] = s->uvblock_base[1];
3820 s->eob = s->eob_base;
3821 s->uveob[0] = s->uveob_base[0];
3822 s->uveob[1] = s->uveob_base[1];
3824 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3825 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3826 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3828 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3831 if (tile_col == s->tiling.tile_cols - 1 &&
3832 tile_row == s->tiling.tile_rows - 1) {
3835 tile_size = AV_RB32(data);
3839 if (tile_size > size) {
3840 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3841 return AVERROR_INVALIDDATA;
3843 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3844 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3845 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3846 return AVERROR_INVALIDDATA;
3853 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3854 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3855 struct VP9Filter *lflvl_ptr = s->lflvl;
3856 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3858 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3859 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3860 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3863 memset(s->left_partition_ctx, 0, 8);
3864 memset(s->left_skip_ctx, 0, 8);
3865 if (s->keyframe || s->intraonly) {
3866 memset(s->left_mode_ctx, DC_PRED, 16);
3868 memset(s->left_mode_ctx, NEARESTMV, 8);
3870 memset(s->left_y_nnz_ctx, 0, 16);
3871 memset(s->left_uv_nnz_ctx, 0, 16);
3872 memset(s->left_segpred_ctx, 0, 8);
3874 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3877 for (col = s->tiling.tile_col_start;
3878 col < s->tiling.tile_col_end;
3879 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3880 // FIXME integrate with lf code (i.e. zero after each
3881 // use, similar to invtxfm coefficients, or similar)
3883 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3887 decode_sb_mem(ctx, row, col, lflvl_ptr,
3888 yoff2, uvoff2, BL_64X64);
3890 decode_sb(ctx, row, col, lflvl_ptr,
3891 yoff2, uvoff2, BL_64X64);
3895 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3903 // backup pre-loopfilter reconstruction data for intra
3904 // prediction of next row of sb64s
3905 if (row + 8 < s->rows) {
3906 memcpy(s->intra_pred_data[0],
3907 f->data[0] + yoff + 63 * ls_y,
3909 memcpy(s->intra_pred_data[1],
3910 f->data[1] + uvoff + 31 * ls_uv,
3912 memcpy(s->intra_pred_data[2],
3913 f->data[2] + uvoff + 31 * ls_uv,
3917 // loopfilter one row
3918 if (s->filter.level) {
3921 lflvl_ptr = s->lflvl;
3922 for (col = 0; col < s->cols;
3923 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3924 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3928 // FIXME maybe we can make this more finegrained by running the
3929 // loopfilter per-block instead of after each sbrow
3930 // In fact that would also make intra pred left preparation easier?
3931 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3935 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3937 ff_thread_finish_setup(ctx);
3939 } while (s->pass++ == 1);
3940 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3943 for (i = 0; i < 8; i++) {
3944 if (s->refs[i].f->data[0])
3945 ff_thread_release_buffer(ctx, &s->refs[i]);
3946 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3949 if (!s->invisible) {
3950 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3958 static void vp9_decode_flush(AVCodecContext *ctx)
3960 VP9Context *s = ctx->priv_data;
3963 for (i = 0; i < 2; i++)
3964 vp9_unref_frame(ctx, &s->frames[i]);
3965 for (i = 0; i < 8; i++)
3966 ff_thread_release_buffer(ctx, &s->refs[i]);
3969 static int init_frames(AVCodecContext *ctx)
3971 VP9Context *s = ctx->priv_data;
3974 for (i = 0; i < 2; i++) {
3975 s->frames[i].tf.f = av_frame_alloc();
3976 if (!s->frames[i].tf.f) {
3977 vp9_decode_free(ctx);
3978 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3979 return AVERROR(ENOMEM);
3982 for (i = 0; i < 8; i++) {
3983 s->refs[i].f = av_frame_alloc();
3984 s->next_refs[i].f = av_frame_alloc();
3985 if (!s->refs[i].f || !s->next_refs[i].f) {
3986 vp9_decode_free(ctx);
3987 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3988 return AVERROR(ENOMEM);
3995 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3997 VP9Context *s = ctx->priv_data;
3999 ctx->internal->allocate_progress = 1;
4000 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4001 ff_vp9dsp_init(&s->dsp);
4002 ff_videodsp_init(&s->vdsp, 8);
4003 s->filter.sharpness = -1;
4005 return init_frames(ctx);
4008 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4010 return init_frames(avctx);
4013 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4016 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4018 // detect size changes in other threads
4019 if (s->intra_pred_data[0] &&
4020 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4024 for (i = 0; i < 2; i++) {
4025 if (s->frames[i].tf.f->data[0])
4026 vp9_unref_frame(dst, &s->frames[i]);
4027 if (ssrc->frames[i].tf.f->data[0]) {
4028 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4032 for (i = 0; i < 8; i++) {
4033 if (s->refs[i].f->data[0])
4034 ff_thread_release_buffer(dst, &s->refs[i]);
4035 if (ssrc->next_refs[i].f->data[0]) {
4036 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4041 s->invisible = ssrc->invisible;
4042 s->keyframe = ssrc->keyframe;
4043 s->uses_2pass = ssrc->uses_2pass;
4044 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4045 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4046 if (ssrc->segmentation.enabled) {
4047 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4048 sizeof(s->segmentation.feat));
4054 AVCodec ff_vp9_decoder = {
4056 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4057 .type = AVMEDIA_TYPE_VIDEO,
4058 .id = AV_CODEC_ID_VP9,
4059 .priv_data_size = sizeof(VP9Context),
4060 .init = vp9_decode_init,
4061 .close = vp9_decode_free,
4062 .decode = vp9_decode_frame,
4063 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4064 .flush = vp9_decode_flush,
4065 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4066 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),