2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
151 uint8_t absolute_vals;
157 uint8_t skip_enabled;
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 unsigned sb_cols, sb_rows, rows, cols;
173 uint8_t coef[4][2][2][6][6][3];
177 uint8_t coef[4][2][2][6][6][11];
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
194 unsigned mv_joint[4];
197 unsigned classes[11];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
202 unsigned class0_hp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
212 // contextual (left/above) cache
213 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
214 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
215 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
216 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
217 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
218 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
225 uint8_t *above_partition_ctx;
226 uint8_t *above_mode_ctx;
227 // FIXME maybe merge some of the below in a flags field?
228 uint8_t *above_y_nnz_ctx;
229 uint8_t *above_uv_nnz_ctx[2];
230 uint8_t *above_skip_ctx; // 1bit
231 uint8_t *above_txfm_ctx; // 2bit
232 uint8_t *above_segpred_ctx; // 1bit
233 uint8_t *above_intra_ctx; // 1bit
234 uint8_t *above_comp_ctx; // 1bit
235 uint8_t *above_ref_ctx; // 2bit
236 uint8_t *above_filter_ctx;
237 VP56mv (*above_mv_ctx)[2];
240 uint8_t *intra_pred_data[3];
241 struct VP9Filter *lflvl;
242 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
244 // block reconstruction intermediates
245 int block_alloc_using_2pass;
246 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
247 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
248 struct { int x, y; } min_mv, max_mv;
249 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
250 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
253 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
255 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
256 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
258 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
259 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
263 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
265 VP9Context *s = ctx->priv_data;
268 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
270 sz = 64 * s->sb_cols * s->sb_rows;
271 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
272 ff_thread_release_buffer(ctx, &f->tf);
273 return AVERROR(ENOMEM);
276 f->segmentation_map = f->extradata->data;
277 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
279 // retain segmentation map if it doesn't update
280 if (s->segmentation.enabled && !s->segmentation.update_map &&
281 !s->intraonly && !s->keyframe && !s->errorres) {
282 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
311 static int update_size(AVCodecContext *ctx, int w, int h)
313 VP9Context *s = ctx->priv_data;
316 av_assert0(w > 0 && h > 0);
318 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
332 return AVERROR(ENOMEM);
333 assign(s->intra_pred_data[0], uint8_t *, 64);
334 assign(s->intra_pred_data[1], uint8_t *, 32);
335 assign(s->intra_pred_data[2], uint8_t *, 32);
336 assign(s->above_y_nnz_ctx, uint8_t *, 16);
337 assign(s->above_mode_ctx, uint8_t *, 16);
338 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
339 assign(s->above_partition_ctx, uint8_t *, 8);
340 assign(s->above_skip_ctx, uint8_t *, 8);
341 assign(s->above_txfm_ctx, uint8_t *, 8);
342 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
344 assign(s->above_segpred_ctx, uint8_t *, 8);
345 assign(s->above_intra_ctx, uint8_t *, 8);
346 assign(s->above_comp_ctx, uint8_t *, 8);
347 assign(s->above_ref_ctx, uint8_t *, 8);
348 assign(s->above_filter_ctx, uint8_t *, 8);
349 assign(s->lflvl, struct VP9Filter *, 1);
352 // these will be re-allocated a little later
353 av_freep(&s->b_base);
354 av_freep(&s->block_base);
359 static int update_block_buffers(AVCodecContext *ctx)
361 VP9Context *s = ctx->priv_data;
363 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
367 av_free(s->block_base);
369 int sbs = s->sb_cols * s->sb_rows;
371 s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
372 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
373 if (!s->b_base || !s->block_base)
374 return AVERROR(ENOMEM);
375 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
376 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
377 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
378 s->uveob_base[0] = s->eob_base + 256 * sbs;
379 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
381 s->b_base = av_malloc(sizeof(VP9Block));
382 s->block_base = av_mallocz((64 * 64 + 128) * 3);
383 if (!s->b_base || !s->block_base)
384 return AVERROR(ENOMEM);
385 s->uvblock_base[0] = s->block_base + 64 * 64;
386 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
387 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
388 s->uveob_base[0] = s->eob_base + 256;
389 s->uveob_base[1] = s->uveob_base[0] + 64;
391 s->block_alloc_using_2pass = s->uses_2pass;
396 // for some reason the sign bit is at the end, not the start, of a bit sequence
397 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
399 int v = get_bits(gb, n);
400 return get_bits1(gb) ? -v : v;
403 static av_always_inline int inv_recenter_nonneg(int v, int m)
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
408 // differential forward probability updates
409 static int update_prob(VP56RangeCoder *c, int p)
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
434 /* This code is trying to do a differential probability update. For a
435 * current probability A in the range [1, 255], the difference to a new
436 * probability of any value can be expressed differentially as 1-A,255-A
437 * where some part of this (absolute range) exists both in positive as
438 * well as the negative part, whereas another part only exists in one
439 * half. We're trying to code this shared part differentially, i.e.
440 * times two where the value of the lowest bit specifies the sign, and
441 * the single part is then coded on top of this. This absolute difference
442 * then again has a value of [0,254], but a bigger value in this range
443 * indicates that we're further away from the original value A, so we
444 * can code this as a VLC code, since higher values are increasingly
445 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
446 * updates vs. the 'fine, exact' updates further down the range, which
447 * adds one extra dimension to this differential update model. */
449 if (!vp8_rac_get(c)) {
450 d = vp8_rac_get_uint(c, 4) + 0;
451 } else if (!vp8_rac_get(c)) {
452 d = vp8_rac_get_uint(c, 4) + 16;
453 } else if (!vp8_rac_get(c)) {
454 d = vp8_rac_get_uint(c, 5) + 32;
456 d = vp8_rac_get_uint(c, 7);
458 d = (d << 1) - 65 + vp8_rac_get(c);
462 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
463 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
466 static int decode_frame_header(AVCodecContext *ctx,
467 const uint8_t *data, int size, int *ref)
469 VP9Context *s = ctx->priv_data;
470 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
472 const uint8_t *data2;
475 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
476 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
479 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
480 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
481 return AVERROR_INVALIDDATA;
483 s->profile = get_bits1(&s->gb);
484 if (get_bits1(&s->gb)) { // reserved bit
485 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
486 return AVERROR_INVALIDDATA;
488 if (get_bits1(&s->gb)) {
489 *ref = get_bits(&s->gb, 3);
492 s->last_uses_2pass = s->uses_2pass;
493 s->last_keyframe = s->keyframe;
494 s->keyframe = !get_bits1(&s->gb);
495 last_invisible = s->invisible;
496 s->invisible = !get_bits1(&s->gb);
497 s->errorres = get_bits1(&s->gb);
498 s->use_last_frame_mvs = !s->errorres && !last_invisible;
500 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
501 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
502 return AVERROR_INVALIDDATA;
504 s->colorspace = get_bits(&s->gb, 3);
505 if (s->colorspace == 7) { // RGB = profile 1
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
507 return AVERROR_INVALIDDATA;
509 s->fullrange = get_bits1(&s->gb);
510 // for profile 1, here follows the subsampling bits
511 s->refreshrefmask = 0xff;
512 w = get_bits(&s->gb, 16) + 1;
513 h = get_bits(&s->gb, 16) + 1;
514 if (get_bits1(&s->gb)) // display size
515 skip_bits(&s->gb, 32);
517 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
518 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
520 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
521 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
522 return AVERROR_INVALIDDATA;
524 s->refreshrefmask = get_bits(&s->gb, 8);
525 w = get_bits(&s->gb, 16) + 1;
526 h = get_bits(&s->gb, 16) + 1;
527 if (get_bits1(&s->gb)) // display size
528 skip_bits(&s->gb, 32);
530 s->refreshrefmask = get_bits(&s->gb, 8);
531 s->refidx[0] = get_bits(&s->gb, 3);
532 s->signbias[0] = get_bits1(&s->gb);
533 s->refidx[1] = get_bits(&s->gb, 3);
534 s->signbias[1] = get_bits1(&s->gb);
535 s->refidx[2] = get_bits(&s->gb, 3);
536 s->signbias[2] = get_bits1(&s->gb);
537 if (!s->refs[s->refidx[0]].f->data[0] ||
538 !s->refs[s->refidx[1]].f->data[0] ||
539 !s->refs[s->refidx[2]].f->data[0]) {
540 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
541 return AVERROR_INVALIDDATA;
543 if (get_bits1(&s->gb)) {
544 w = s->refs[s->refidx[0]].f->width;
545 h = s->refs[s->refidx[0]].f->height;
546 } else if (get_bits1(&s->gb)) {
547 w = s->refs[s->refidx[1]].f->width;
548 h = s->refs[s->refidx[1]].f->height;
549 } else if (get_bits1(&s->gb)) {
550 w = s->refs[s->refidx[2]].f->width;
551 h = s->refs[s->refidx[2]].f->height;
553 w = get_bits(&s->gb, 16) + 1;
554 h = get_bits(&s->gb, 16) + 1;
556 // Note that in this code, "CUR_FRAME" is actually before we
557 // have formally allocated a frame, and thus actually represents
559 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
560 s->frames[CUR_FRAME].tf.f->height == h;
561 if (get_bits1(&s->gb)) // display size
562 skip_bits(&s->gb, 32);
563 s->highprecisionmvs = get_bits1(&s->gb);
564 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
566 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
567 s->signbias[0] != s->signbias[2];
568 if (s->allowcompinter) {
569 if (s->signbias[0] == s->signbias[1]) {
571 s->varcompref[0] = 0;
572 s->varcompref[1] = 1;
573 } else if (s->signbias[0] == s->signbias[2]) {
575 s->varcompref[0] = 0;
576 s->varcompref[1] = 2;
579 s->varcompref[0] = 1;
580 s->varcompref[1] = 2;
585 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
586 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
587 s->framectxid = c = get_bits(&s->gb, 2);
589 /* loopfilter header data */
590 s->filter.level = get_bits(&s->gb, 6);
591 sharp = get_bits(&s->gb, 3);
592 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
593 // the old cache values since they are still valid
594 if (s->filter.sharpness != sharp)
595 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
596 s->filter.sharpness = sharp;
597 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
598 if (get_bits1(&s->gb)) {
599 for (i = 0; i < 4; i++)
600 if (get_bits1(&s->gb))
601 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
602 for (i = 0; i < 2; i++)
603 if (get_bits1(&s->gb))
604 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
607 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
610 /* quantization header data */
611 s->yac_qi = get_bits(&s->gb, 8);
612 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
613 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
614 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
615 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
616 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
618 /* segmentation header info */
619 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
620 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
621 for (i = 0; i < 7; i++)
622 s->prob.seg[i] = get_bits1(&s->gb) ?
623 get_bits(&s->gb, 8) : 255;
624 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
625 for (i = 0; i < 3; i++)
626 s->prob.segpred[i] = get_bits1(&s->gb) ?
627 get_bits(&s->gb, 8) : 255;
630 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
631 (w != s->frames[CUR_FRAME].tf.f->width ||
632 h != s->frames[CUR_FRAME].tf.f->height)) {
633 av_log(ctx, AV_LOG_ERROR,
634 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
635 s->segmentation.temporal, s->segmentation.update_map);
636 return AVERROR_INVALIDDATA;
639 if (get_bits1(&s->gb)) {
640 s->segmentation.absolute_vals = get_bits1(&s->gb);
641 for (i = 0; i < 8; i++) {
642 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
643 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
644 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
645 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
646 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
647 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
648 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
652 s->segmentation.feat[0].q_enabled = 0;
653 s->segmentation.feat[0].lf_enabled = 0;
654 s->segmentation.feat[0].skip_enabled = 0;
655 s->segmentation.feat[0].ref_enabled = 0;
658 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
659 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
660 int qyac, qydc, quvac, quvdc, lflvl, sh;
662 if (s->segmentation.feat[i].q_enabled) {
663 if (s->segmentation.absolute_vals)
664 qyac = s->segmentation.feat[i].q_val;
666 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
670 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
671 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
672 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
673 qyac = av_clip_uintp2(qyac, 8);
675 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
676 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
677 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
678 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
680 sh = s->filter.level >= 32;
681 if (s->segmentation.feat[i].lf_enabled) {
682 if (s->segmentation.absolute_vals)
683 lflvl = s->segmentation.feat[i].lf_val;
685 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
687 lflvl = s->filter.level;
689 s->segmentation.feat[i].lflvl[0][0] =
690 s->segmentation.feat[i].lflvl[0][1] =
691 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
692 for (j = 1; j < 4; j++) {
693 s->segmentation.feat[i].lflvl[j][0] =
694 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
695 s->lf_delta.mode[0]) << sh), 6);
696 s->segmentation.feat[i].lflvl[j][1] =
697 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
698 s->lf_delta.mode[1]) << sh), 6);
703 if ((res = update_size(ctx, w, h)) < 0) {
704 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
707 for (s->tiling.log2_tile_cols = 0;
708 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
709 s->tiling.log2_tile_cols++) ;
710 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
711 max = FFMAX(0, max - 1);
712 while (max > s->tiling.log2_tile_cols) {
713 if (get_bits1(&s->gb))
714 s->tiling.log2_tile_cols++;
718 s->tiling.log2_tile_rows = decode012(&s->gb);
719 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
720 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
721 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
722 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
723 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
725 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
726 return AVERROR(ENOMEM);
730 if (s->keyframe || s->errorres || s->intraonly) {
731 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
732 s->prob_ctx[3].p = vp9_default_probs;
733 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
734 sizeof(vp9_default_coef_probs));
735 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
736 sizeof(vp9_default_coef_probs));
737 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
738 sizeof(vp9_default_coef_probs));
739 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
740 sizeof(vp9_default_coef_probs));
743 // next 16 bits is size of the rest of the header (arith-coded)
744 size2 = get_bits(&s->gb, 16);
745 data2 = align_get_bits(&s->gb);
746 if (size2 > size - (data2 - data)) {
747 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
748 return AVERROR_INVALIDDATA;
750 ff_vp56_init_range_decoder(&s->c, data2, size2);
751 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
752 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
753 return AVERROR_INVALIDDATA;
756 if (s->keyframe || s->intraonly) {
757 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
759 memset(&s->counts, 0, sizeof(s->counts));
761 // FIXME is it faster to not copy here, but do it down in the fw updates
762 // as explicit copies if the fw update is missing (and skip the copy upon
764 s->prob.p = s->prob_ctx[c].p;
768 s->txfmmode = TX_4X4;
770 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
771 if (s->txfmmode == 3)
772 s->txfmmode += vp8_rac_get(&s->c);
774 if (s->txfmmode == TX_SWITCHABLE) {
775 for (i = 0; i < 2; i++)
776 if (vp56_rac_get_prob_branchy(&s->c, 252))
777 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
778 for (i = 0; i < 2; i++)
779 for (j = 0; j < 2; j++)
780 if (vp56_rac_get_prob_branchy(&s->c, 252))
781 s->prob.p.tx16p[i][j] =
782 update_prob(&s->c, s->prob.p.tx16p[i][j]);
783 for (i = 0; i < 2; i++)
784 for (j = 0; j < 3; j++)
785 if (vp56_rac_get_prob_branchy(&s->c, 252))
786 s->prob.p.tx32p[i][j] =
787 update_prob(&s->c, s->prob.p.tx32p[i][j]);
792 for (i = 0; i < 4; i++) {
793 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
794 if (vp8_rac_get(&s->c)) {
795 for (j = 0; j < 2; j++)
796 for (k = 0; k < 2; k++)
797 for (l = 0; l < 6; l++)
798 for (m = 0; m < 6; m++) {
799 uint8_t *p = s->prob.coef[i][j][k][l][m];
800 uint8_t *r = ref[j][k][l][m];
801 if (m >= 3 && l == 0) // dc only has 3 pt
803 for (n = 0; n < 3; n++) {
804 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
805 p[n] = update_prob(&s->c, r[n]);
813 for (j = 0; j < 2; j++)
814 for (k = 0; k < 2; k++)
815 for (l = 0; l < 6; l++)
816 for (m = 0; m < 6; m++) {
817 uint8_t *p = s->prob.coef[i][j][k][l][m];
818 uint8_t *r = ref[j][k][l][m];
819 if (m > 3 && l == 0) // dc only has 3 pt
825 if (s->txfmmode == i)
830 for (i = 0; i < 3; i++)
831 if (vp56_rac_get_prob_branchy(&s->c, 252))
832 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
833 if (!s->keyframe && !s->intraonly) {
834 for (i = 0; i < 7; i++)
835 for (j = 0; j < 3; j++)
836 if (vp56_rac_get_prob_branchy(&s->c, 252))
837 s->prob.p.mv_mode[i][j] =
838 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
840 if (s->filtermode == FILTER_SWITCHABLE)
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 2; j++)
843 if (vp56_rac_get_prob_branchy(&s->c, 252))
844 s->prob.p.filter[i][j] =
845 update_prob(&s->c, s->prob.p.filter[i][j]);
847 for (i = 0; i < 4; i++)
848 if (vp56_rac_get_prob_branchy(&s->c, 252))
849 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
851 if (s->allowcompinter) {
852 s->comppredmode = vp8_rac_get(&s->c);
854 s->comppredmode += vp8_rac_get(&s->c);
855 if (s->comppredmode == PRED_SWITCHABLE)
856 for (i = 0; i < 5; i++)
857 if (vp56_rac_get_prob_branchy(&s->c, 252))
859 update_prob(&s->c, s->prob.p.comp[i]);
861 s->comppredmode = PRED_SINGLEREF;
864 if (s->comppredmode != PRED_COMPREF) {
865 for (i = 0; i < 5; i++) {
866 if (vp56_rac_get_prob_branchy(&s->c, 252))
867 s->prob.p.single_ref[i][0] =
868 update_prob(&s->c, s->prob.p.single_ref[i][0]);
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.single_ref[i][1] =
871 update_prob(&s->c, s->prob.p.single_ref[i][1]);
875 if (s->comppredmode != PRED_SINGLEREF) {
876 for (i = 0; i < 5; i++)
877 if (vp56_rac_get_prob_branchy(&s->c, 252))
878 s->prob.p.comp_ref[i] =
879 update_prob(&s->c, s->prob.p.comp_ref[i]);
882 for (i = 0; i < 4; i++)
883 for (j = 0; j < 9; j++)
884 if (vp56_rac_get_prob_branchy(&s->c, 252))
885 s->prob.p.y_mode[i][j] =
886 update_prob(&s->c, s->prob.p.y_mode[i][j]);
888 for (i = 0; i < 4; i++)
889 for (j = 0; j < 4; j++)
890 for (k = 0; k < 3; k++)
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.partition[3 - i][j][k] =
893 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
895 // mv fields don't use the update_prob subexp model for some reason
896 for (i = 0; i < 3; i++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
900 for (i = 0; i < 2; i++) {
901 if (vp56_rac_get_prob_branchy(&s->c, 252))
902 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
904 for (j = 0; j < 10; j++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.mv_comp[i].classes[j] =
907 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
909 if (vp56_rac_get_prob_branchy(&s->c, 252))
910 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
912 for (j = 0; j < 10; j++)
913 if (vp56_rac_get_prob_branchy(&s->c, 252))
914 s->prob.p.mv_comp[i].bits[j] =
915 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
918 for (i = 0; i < 2; i++) {
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 3; k++)
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 s->prob.p.mv_comp[i].class0_fp[j][k] =
923 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
925 for (j = 0; j < 3; j++)
926 if (vp56_rac_get_prob_branchy(&s->c, 252))
927 s->prob.p.mv_comp[i].fp[j] =
928 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
931 if (s->highprecisionmvs) {
932 for (i = 0; i < 2; i++) {
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.mv_comp[i].class0_hp =
935 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.mv_comp[i].hp =
939 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
944 return (data2 - data) + size2;
947 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
950 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
951 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
954 static void find_ref_mvs(VP9Context *s,
955 VP56mv *pmv, int ref, int z, int idx, int sb)
957 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
958 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
959 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
960 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
961 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
962 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
963 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
964 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
965 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
966 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
967 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
969 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
970 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
971 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
972 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
973 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
974 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
975 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
976 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
977 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
978 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
986 int row = s->row, col = s->col, row7 = s->row7;
987 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
988 #define INVALID_MV 0x80008000U
989 uint32_t mem = INVALID_MV;
992 #define RETURN_DIRECT_MV(mv) \
994 uint32_t m = AV_RN32A(&mv); \
998 } else if (mem == INVALID_MV) { \
1000 } else if (m != mem) { \
1007 if (sb == 2 || sb == 1) {
1008 RETURN_DIRECT_MV(b->mv[0][z]);
1009 } else if (sb == 3) {
1010 RETURN_DIRECT_MV(b->mv[2][z]);
1011 RETURN_DIRECT_MV(b->mv[1][z]);
1012 RETURN_DIRECT_MV(b->mv[0][z]);
1015 #define RETURN_MV(mv) \
1020 clamp_mv(&tmp, &mv, s); \
1021 m = AV_RN32A(&tmp); \
1025 } else if (mem == INVALID_MV) { \
1027 } else if (m != mem) { \
1032 uint32_t m = AV_RN32A(&mv); \
1034 clamp_mv(pmv, &mv, s); \
1036 } else if (mem == INVALID_MV) { \
1038 } else if (m != mem) { \
1039 clamp_mv(pmv, &mv, s); \
1046 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1047 if (mv->ref[0] == ref) {
1048 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1049 } else if (mv->ref[1] == ref) {
1050 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1053 if (col > s->tiling.tile_col_start) {
1054 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1055 if (mv->ref[0] == ref) {
1056 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1057 } else if (mv->ref[1] == ref) {
1058 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1066 // previously coded MVs in this neighbourhood, using same reference frame
1067 for (; i < 8; i++) {
1068 int c = p[i][0] + col, r = p[i][1] + row;
1070 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1071 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1073 if (mv->ref[0] == ref) {
1074 RETURN_MV(mv->mv[0]);
1075 } else if (mv->ref[1] == ref) {
1076 RETURN_MV(mv->mv[1]);
1081 // MV at this position in previous frame, using same reference frame
1082 if (s->use_last_frame_mvs) {
1083 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1085 if (!s->last_uses_2pass)
1086 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1087 if (mv->ref[0] == ref) {
1088 RETURN_MV(mv->mv[0]);
1089 } else if (mv->ref[1] == ref) {
1090 RETURN_MV(mv->mv[1]);
1094 #define RETURN_SCALE_MV(mv, scale) \
1097 VP56mv mv_temp = { -mv.x, -mv.y }; \
1098 RETURN_MV(mv_temp); \
1104 // previously coded MVs in this neighbourhood, using different reference frame
1105 for (i = 0; i < 8; i++) {
1106 int c = p[i][0] + col, r = p[i][1] + row;
1108 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1109 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1111 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1112 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1114 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1115 // BUG - libvpx has this condition regardless of whether
1116 // we used the first ref MV and pre-scaling
1117 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1118 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1123 // MV at this position in previous frame, using different reference frame
1124 if (s->use_last_frame_mvs) {
1125 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1127 // no need to await_progress, because we already did that above
1128 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1129 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1131 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1132 // BUG - libvpx has this condition regardless of whether
1133 // we used the first ref MV and pre-scaling
1134 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1135 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1142 #undef RETURN_SCALE_MV
1145 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1147 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1148 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1149 s->prob.p.mv_comp[idx].classes);
1151 s->counts.mv_comp[idx].sign[sign]++;
1152 s->counts.mv_comp[idx].classes[c]++;
1156 for (n = 0, m = 0; m < c; m++) {
1157 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1159 s->counts.mv_comp[idx].bits[m][bit]++;
1162 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1164 s->counts.mv_comp[idx].fp[bit]++;
1166 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1167 s->counts.mv_comp[idx].hp[bit]++;
1171 // bug in libvpx - we count for bw entropy purposes even if the
1173 s->counts.mv_comp[idx].hp[1]++;
1177 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1178 s->counts.mv_comp[idx].class0[n]++;
1179 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1180 s->prob.p.mv_comp[idx].class0_fp[n]);
1181 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1182 n = (n << 3) | (bit << 1);
1184 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1185 s->counts.mv_comp[idx].class0_hp[bit]++;
1189 // bug in libvpx - we count for bw entropy purposes even if the
1191 s->counts.mv_comp[idx].class0_hp[1]++;
1195 return sign ? -(n + 1) : (n + 1);
1198 static void fill_mv(VP9Context *s,
1199 VP56mv *mv, int mode, int sb)
1203 if (mode == ZEROMV) {
1208 // FIXME cache this value and reuse for other subblocks
1209 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1210 mode == NEWMV ? -1 : sb);
1211 // FIXME maybe move this code into find_ref_mvs()
1212 if ((mode == NEWMV || sb == -1) &&
1213 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1227 if (mode == NEWMV) {
1228 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1229 s->prob.p.mv_joint);
1231 s->counts.mv_joint[j]++;
1232 if (j >= MV_JOINT_V)
1233 mv[0].y += read_mv_component(s, 0, hp);
1235 mv[0].x += read_mv_component(s, 1, hp);
1239 // FIXME cache this value and reuse for other subblocks
1240 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1241 mode == NEWMV ? -1 : sb);
1242 if ((mode == NEWMV || sb == -1) &&
1243 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1257 if (mode == NEWMV) {
1258 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1259 s->prob.p.mv_joint);
1261 s->counts.mv_joint[j]++;
1262 if (j >= MV_JOINT_V)
1263 mv[1].y += read_mv_component(s, 0, hp);
1265 mv[1].x += read_mv_component(s, 1, hp);
1271 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1272 ptrdiff_t stride, int v)
1282 int v16 = v * 0x0101;
1290 uint32_t v32 = v * 0x01010101;
1299 uint64_t v64 = v * 0x0101010101010101ULL;
1305 uint32_t v32 = v * 0x01010101;
1308 AV_WN32A(ptr + 4, v32);
1317 static void decode_mode(AVCodecContext *ctx)
1319 static const uint8_t left_ctx[N_BS_SIZES] = {
1320 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1322 static const uint8_t above_ctx[N_BS_SIZES] = {
1323 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1325 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1326 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1327 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1329 VP9Context *s = ctx->priv_data;
1331 int row = s->row, col = s->col, row7 = s->row7;
1332 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1333 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1334 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1335 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1336 int vref, filter_id;
1338 if (!s->segmentation.enabled) {
1340 } else if (s->keyframe || s->intraonly) {
1341 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1342 } else if (!s->segmentation.update_map ||
1343 (s->segmentation.temporal &&
1344 vp56_rac_get_prob_branchy(&s->c,
1345 s->prob.segpred[s->above_segpred_ctx[col] +
1346 s->left_segpred_ctx[row7]]))) {
1349 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1351 if (!s->last_uses_2pass)
1352 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1353 for (y = 0; y < h4; y++)
1354 for (x = 0; x < w4; x++)
1355 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1356 av_assert1(pred < 8);
1362 memset(&s->above_segpred_ctx[col], 1, w4);
1363 memset(&s->left_segpred_ctx[row7], 1, h4);
1365 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1368 memset(&s->above_segpred_ctx[col], 0, w4);
1369 memset(&s->left_segpred_ctx[row7], 0, h4);
1371 if (s->segmentation.enabled &&
1372 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1373 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1374 w4, h4, 8 * s->sb_cols, b->seg_id);
1377 b->skip = s->segmentation.enabled &&
1378 s->segmentation.feat[b->seg_id].skip_enabled;
1380 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1381 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1382 s->counts.skip[c][b->skip]++;
1385 if (s->keyframe || s->intraonly) {
1387 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1388 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1392 if (have_a && have_l) {
1393 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1396 c = have_a ? 2 * s->above_intra_ctx[col] :
1397 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1399 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1400 s->counts.intra[c][bit]++;
1404 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1408 c = (s->above_skip_ctx[col] ? max_tx :
1409 s->above_txfm_ctx[col]) +
1410 (s->left_skip_ctx[row7] ? max_tx :
1411 s->left_txfm_ctx[row7]) > max_tx;
1413 c = s->above_skip_ctx[col] ? 1 :
1414 (s->above_txfm_ctx[col] * 2 > max_tx);
1416 } else if (have_l) {
1417 c = s->left_skip_ctx[row7] ? 1 :
1418 (s->left_txfm_ctx[row7] * 2 > max_tx);
1424 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1426 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1428 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1430 s->counts.tx32p[c][b->tx]++;
1433 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1435 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1436 s->counts.tx16p[c][b->tx]++;
1439 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1440 s->counts.tx8p[c][b->tx]++;
1447 b->tx = FFMIN(max_tx, s->txfmmode);
1450 if (s->keyframe || s->intraonly) {
1451 uint8_t *a = &s->above_mode_ctx[col * 2];
1452 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1455 if (b->bs > BS_8x8) {
1456 // FIXME the memory storage intermediates here aren't really
1457 // necessary, they're just there to make the code slightly
1459 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1460 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1461 if (b->bs != BS_8x4) {
1462 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1463 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1464 l[0] = a[1] = b->mode[1];
1466 l[0] = a[1] = b->mode[1] = b->mode[0];
1468 if (b->bs != BS_4x8) {
1469 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1470 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1471 if (b->bs != BS_8x4) {
1472 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1473 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1474 l[1] = a[1] = b->mode[3];
1476 l[1] = a[1] = b->mode[3] = b->mode[2];
1479 b->mode[2] = b->mode[0];
1480 l[1] = a[1] = b->mode[3] = b->mode[1];
1483 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1484 vp9_default_kf_ymode_probs[*a][*l]);
1485 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1486 // FIXME this can probably be optimized
1487 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1488 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1490 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1491 vp9_default_kf_uvmode_probs[b->mode[3]]);
1492 } else if (b->intra) {
1494 if (b->bs > BS_8x8) {
1495 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1496 s->prob.p.y_mode[0]);
1497 s->counts.y_mode[0][b->mode[0]]++;
1498 if (b->bs != BS_8x4) {
1499 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1500 s->prob.p.y_mode[0]);
1501 s->counts.y_mode[0][b->mode[1]]++;
1503 b->mode[1] = b->mode[0];
1505 if (b->bs != BS_4x8) {
1506 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1507 s->prob.p.y_mode[0]);
1508 s->counts.y_mode[0][b->mode[2]]++;
1509 if (b->bs != BS_8x4) {
1510 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1511 s->prob.p.y_mode[0]);
1512 s->counts.y_mode[0][b->mode[3]]++;
1514 b->mode[3] = b->mode[2];
1517 b->mode[2] = b->mode[0];
1518 b->mode[3] = b->mode[1];
1521 static const uint8_t size_group[10] = {
1522 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1524 int sz = size_group[b->bs];
1526 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1527 s->prob.p.y_mode[sz]);
1528 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1529 s->counts.y_mode[sz][b->mode[3]]++;
1531 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1532 s->prob.p.uv_mode[b->mode[3]]);
1533 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1535 static const uint8_t inter_mode_ctx_lut[14][14] = {
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1543 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1544 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1545 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1546 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1547 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1548 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1549 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1552 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1553 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1555 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1557 // read comp_pred flag
1558 if (s->comppredmode != PRED_SWITCHABLE) {
1559 b->comp = s->comppredmode == PRED_COMPREF;
1563 // FIXME add intra as ref=0xff (or -1) to make these easier?
1566 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1568 } else if (s->above_comp_ctx[col]) {
1569 c = 2 + (s->left_intra_ctx[row7] ||
1570 s->left_ref_ctx[row7] == s->fixcompref);
1571 } else if (s->left_comp_ctx[row7]) {
1572 c = 2 + (s->above_intra_ctx[col] ||
1573 s->above_ref_ctx[col] == s->fixcompref);
1575 c = (!s->above_intra_ctx[col] &&
1576 s->above_ref_ctx[col] == s->fixcompref) ^
1577 (!s->left_intra_ctx[row7] &&
1578 s->left_ref_ctx[row & 7] == s->fixcompref);
1581 c = s->above_comp_ctx[col] ? 3 :
1582 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1584 } else if (have_l) {
1585 c = s->left_comp_ctx[row7] ? 3 :
1586 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1590 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1591 s->counts.comp[c][b->comp]++;
1594 // read actual references
1595 // FIXME probably cache a few variables here to prevent repetitive
1596 // memory accesses below
1597 if (b->comp) /* two references */ {
1598 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1600 b->ref[fix_idx] = s->fixcompref;
1601 // FIXME can this codeblob be replaced by some sort of LUT?
1604 if (s->above_intra_ctx[col]) {
1605 if (s->left_intra_ctx[row7]) {
1608 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1610 } else if (s->left_intra_ctx[row7]) {
1611 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1613 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1615 if (refl == refa && refa == s->varcompref[1]) {
1617 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1618 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1619 (refl == s->fixcompref && refa == s->varcompref[0])) {
1622 c = (refa == refl) ? 3 : 1;
1624 } else if (!s->left_comp_ctx[row7]) {
1625 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1628 c = (refl == s->varcompref[1] &&
1629 refa != s->varcompref[1]) ? 2 : 4;
1631 } else if (!s->above_comp_ctx[col]) {
1632 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1635 c = (refa == s->varcompref[1] &&
1636 refl != s->varcompref[1]) ? 2 : 4;
1639 c = (refl == refa) ? 4 : 2;
1643 if (s->above_intra_ctx[col]) {
1645 } else if (s->above_comp_ctx[col]) {
1646 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1648 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1651 } else if (have_l) {
1652 if (s->left_intra_ctx[row7]) {
1654 } else if (s->left_comp_ctx[row7]) {
1655 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1657 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1662 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1663 b->ref[var_idx] = s->varcompref[bit];
1664 s->counts.comp_ref[c][bit]++;
1665 } else /* single reference */ {
1668 if (have_a && !s->above_intra_ctx[col]) {
1669 if (have_l && !s->left_intra_ctx[row7]) {
1670 if (s->left_comp_ctx[row7]) {
1671 if (s->above_comp_ctx[col]) {
1672 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1673 !s->above_ref_ctx[col]);
1675 c = (3 * !s->above_ref_ctx[col]) +
1676 (!s->fixcompref || !s->left_ref_ctx[row7]);
1678 } else if (s->above_comp_ctx[col]) {
1679 c = (3 * !s->left_ref_ctx[row7]) +
1680 (!s->fixcompref || !s->above_ref_ctx[col]);
1682 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1684 } else if (s->above_intra_ctx[col]) {
1686 } else if (s->above_comp_ctx[col]) {
1687 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1689 c = 4 * (!s->above_ref_ctx[col]);
1691 } else if (have_l && !s->left_intra_ctx[row7]) {
1692 if (s->left_intra_ctx[row7]) {
1694 } else if (s->left_comp_ctx[row7]) {
1695 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1697 c = 4 * (!s->left_ref_ctx[row7]);
1702 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1703 s->counts.single_ref[c][0][bit]++;
1707 // FIXME can this codeblob be replaced by some sort of LUT?
1710 if (s->left_intra_ctx[row7]) {
1711 if (s->above_intra_ctx[col]) {
1713 } else if (s->above_comp_ctx[col]) {
1714 c = 1 + 2 * (s->fixcompref == 1 ||
1715 s->above_ref_ctx[col] == 1);
1716 } else if (!s->above_ref_ctx[col]) {
1719 c = 4 * (s->above_ref_ctx[col] == 1);
1721 } else if (s->above_intra_ctx[col]) {
1722 if (s->left_intra_ctx[row7]) {
1724 } else if (s->left_comp_ctx[row7]) {
1725 c = 1 + 2 * (s->fixcompref == 1 ||
1726 s->left_ref_ctx[row7] == 1);
1727 } else if (!s->left_ref_ctx[row7]) {
1730 c = 4 * (s->left_ref_ctx[row7] == 1);
1732 } else if (s->above_comp_ctx[col]) {
1733 if (s->left_comp_ctx[row7]) {
1734 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1735 c = 3 * (s->fixcompref == 1 ||
1736 s->left_ref_ctx[row7] == 1);
1740 } else if (!s->left_ref_ctx[row7]) {
1741 c = 1 + 2 * (s->fixcompref == 1 ||
1742 s->above_ref_ctx[col] == 1);
1744 c = 3 * (s->left_ref_ctx[row7] == 1) +
1745 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1747 } else if (s->left_comp_ctx[row7]) {
1748 if (!s->above_ref_ctx[col]) {
1749 c = 1 + 2 * (s->fixcompref == 1 ||
1750 s->left_ref_ctx[row7] == 1);
1752 c = 3 * (s->above_ref_ctx[col] == 1) +
1753 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1755 } else if (!s->above_ref_ctx[col]) {
1756 if (!s->left_ref_ctx[row7]) {
1759 c = 4 * (s->left_ref_ctx[row7] == 1);
1761 } else if (!s->left_ref_ctx[row7]) {
1762 c = 4 * (s->above_ref_ctx[col] == 1);
1764 c = 2 * (s->left_ref_ctx[row7] == 1) +
1765 2 * (s->above_ref_ctx[col] == 1);
1768 if (s->above_intra_ctx[col] ||
1769 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1771 } else if (s->above_comp_ctx[col]) {
1772 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1774 c = 4 * (s->above_ref_ctx[col] == 1);
1777 } else if (have_l) {
1778 if (s->left_intra_ctx[row7] ||
1779 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1781 } else if (s->left_comp_ctx[row7]) {
1782 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1784 c = 4 * (s->left_ref_ctx[row7] == 1);
1789 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1790 s->counts.single_ref[c][1][bit]++;
1791 b->ref[0] = 1 + bit;
1796 if (b->bs <= BS_8x8) {
1797 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1798 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1800 static const uint8_t off[10] = {
1801 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1804 // FIXME this needs to use the LUT tables from find_ref_mvs
1805 // because not all are -1,0/0,-1
1806 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1807 [s->left_mode_ctx[row7 + off[b->bs]]];
1809 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1810 s->prob.p.mv_mode[c]);
1811 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1812 s->counts.mv_mode[c][b->mode[0] - 10]++;
1816 if (s->filtermode == FILTER_SWITCHABLE) {
1819 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1820 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1821 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1822 s->left_filter_ctx[row7] : 3;
1824 c = s->above_filter_ctx[col];
1826 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1827 c = s->left_filter_ctx[row7];
1832 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1833 s->prob.p.filter[c]);
1834 s->counts.filter[c][filter_id]++;
1835 b->filter = vp9_filter_lut[filter_id];
1837 b->filter = s->filtermode;
1840 if (b->bs > BS_8x8) {
1841 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1843 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1844 s->prob.p.mv_mode[c]);
1845 s->counts.mv_mode[c][b->mode[0] - 10]++;
1846 fill_mv(s, b->mv[0], b->mode[0], 0);
1848 if (b->bs != BS_8x4) {
1849 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1850 s->prob.p.mv_mode[c]);
1851 s->counts.mv_mode[c][b->mode[1] - 10]++;
1852 fill_mv(s, b->mv[1], b->mode[1], 1);
1854 b->mode[1] = b->mode[0];
1855 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1856 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1859 if (b->bs != BS_4x8) {
1860 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1861 s->prob.p.mv_mode[c]);
1862 s->counts.mv_mode[c][b->mode[2] - 10]++;
1863 fill_mv(s, b->mv[2], b->mode[2], 2);
1865 if (b->bs != BS_8x4) {
1866 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1867 s->prob.p.mv_mode[c]);
1868 s->counts.mv_mode[c][b->mode[3] - 10]++;
1869 fill_mv(s, b->mv[3], b->mode[3], 3);
1871 b->mode[3] = b->mode[2];
1872 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1873 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1876 b->mode[2] = b->mode[0];
1877 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1878 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1879 b->mode[3] = b->mode[1];
1880 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1881 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1884 fill_mv(s, b->mv[0], b->mode[0], -1);
1885 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1886 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1887 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1888 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1889 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1890 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1893 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1897 #define SPLAT_CTX(var, val, n) \
1899 case 1: var = val; break; \
1900 case 2: AV_WN16A(&var, val * 0x0101); break; \
1901 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1902 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1904 uint64_t v64 = val * 0x0101010101010101ULL; \
1905 AV_WN64A( &var, v64); \
1906 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1911 #define SPLAT_CTX(var, val, n) \
1913 case 1: var = val; break; \
1914 case 2: AV_WN16A(&var, val * 0x0101); break; \
1915 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1917 uint32_t v32 = val * 0x01010101; \
1918 AV_WN32A( &var, v32); \
1919 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1923 uint32_t v32 = val * 0x01010101; \
1924 AV_WN32A( &var, v32); \
1925 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1926 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1927 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1933 switch (bwh_tab[1][b->bs][0]) {
1934 #define SET_CTXS(dir, off, n) \
1936 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1937 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1938 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1939 if (!s->keyframe && !s->intraonly) { \
1940 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1941 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1942 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1944 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1945 if (s->filtermode == FILTER_SWITCHABLE) { \
1946 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1951 case 1: SET_CTXS(above, col, 1); break;
1952 case 2: SET_CTXS(above, col, 2); break;
1953 case 4: SET_CTXS(above, col, 4); break;
1954 case 8: SET_CTXS(above, col, 8); break;
1956 switch (bwh_tab[1][b->bs][1]) {
1957 case 1: SET_CTXS(left, row7, 1); break;
1958 case 2: SET_CTXS(left, row7, 2); break;
1959 case 4: SET_CTXS(left, row7, 4); break;
1960 case 8: SET_CTXS(left, row7, 8); break;
1965 if (!s->keyframe && !s->intraonly) {
1966 if (b->bs > BS_8x8) {
1967 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1969 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1970 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1971 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1972 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1973 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1974 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1975 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1976 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1978 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1980 for (n = 0; n < w4 * 2; n++) {
1981 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1982 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1984 for (n = 0; n < h4 * 2; n++) {
1985 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1986 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1992 for (y = 0; y < h4; y++) {
1993 int x, o = (row + y) * s->sb_cols * 8 + col;
1994 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1997 for (x = 0; x < w4; x++) {
2001 } else if (b->comp) {
2002 for (x = 0; x < w4; x++) {
2003 mv[x].ref[0] = b->ref[0];
2004 mv[x].ref[1] = b->ref[1];
2005 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2006 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2009 for (x = 0; x < w4; x++) {
2010 mv[x].ref[0] = b->ref[0];
2012 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2018 // FIXME merge cnt/eob arguments?
2019 static av_always_inline int
2020 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2021 int is_tx32x32, unsigned (*cnt)[6][3],
2022 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2023 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2024 const int16_t *band_counts, const int16_t *qmul)
2026 int i = 0, band = 0, band_left = band_counts[band];
2027 uint8_t *tp = p[0][nnz];
2028 uint8_t cache[1024];
2033 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2034 eob[band][nnz][val]++;
2039 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2040 cnt[band][nnz][0]++;
2042 band_left = band_counts[++band];
2044 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2046 if (++i == n_coeffs)
2047 break; //invalid input; blocks should end with EOB
2052 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2053 cnt[band][nnz][1]++;
2057 // fill in p[3-10] (model fill) - only once per frame for each pos
2059 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2061 cnt[band][nnz][2]++;
2062 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2063 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2064 cache[rc] = val = 2;
2066 val = 3 + vp56_rac_get_prob(c, tp[5]);
2069 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2071 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2072 val = 5 + vp56_rac_get_prob(c, 159);
2074 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2075 val += vp56_rac_get_prob(c, 145);
2079 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2080 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2081 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2082 val += (vp56_rac_get_prob(c, 148) << 1);
2083 val += vp56_rac_get_prob(c, 140);
2085 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2086 val += (vp56_rac_get_prob(c, 155) << 2);
2087 val += (vp56_rac_get_prob(c, 140) << 1);
2088 val += vp56_rac_get_prob(c, 135);
2090 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2091 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2092 val += (vp56_rac_get_prob(c, 157) << 3);
2093 val += (vp56_rac_get_prob(c, 141) << 2);
2094 val += (vp56_rac_get_prob(c, 134) << 1);
2095 val += vp56_rac_get_prob(c, 130);
2097 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2098 val += (vp56_rac_get_prob(c, 254) << 12);
2099 val += (vp56_rac_get_prob(c, 254) << 11);
2100 val += (vp56_rac_get_prob(c, 252) << 10);
2101 val += (vp56_rac_get_prob(c, 249) << 9);
2102 val += (vp56_rac_get_prob(c, 243) << 8);
2103 val += (vp56_rac_get_prob(c, 230) << 7);
2104 val += (vp56_rac_get_prob(c, 196) << 6);
2105 val += (vp56_rac_get_prob(c, 177) << 5);
2106 val += (vp56_rac_get_prob(c, 153) << 4);
2107 val += (vp56_rac_get_prob(c, 140) << 3);
2108 val += (vp56_rac_get_prob(c, 133) << 2);
2109 val += (vp56_rac_get_prob(c, 130) << 1);
2110 val += vp56_rac_get_prob(c, 129);
2115 band_left = band_counts[++band];
2117 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2119 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2120 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2122 } while (++i < n_coeffs);
2127 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2128 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2129 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2130 const int16_t (*nb)[2], const int16_t *band_counts,
2131 const int16_t *qmul)
2133 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2134 nnz, scan, nb, band_counts, qmul);
2137 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2138 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2139 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2140 const int16_t (*nb)[2], const int16_t *band_counts,
2141 const int16_t *qmul)
2143 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2144 nnz, scan, nb, band_counts, qmul);
2147 static void decode_coeffs(AVCodecContext *ctx)
2149 VP9Context *s = ctx->priv_data;
2151 int row = s->row, col = s->col;
2152 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2153 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2154 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2155 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2156 int end_x = FFMIN(2 * (s->cols - col), w4);
2157 int end_y = FFMIN(2 * (s->rows - row), h4);
2158 int n, pl, x, y, res;
2159 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2160 int tx = 4 * s->lossless + b->tx;
2161 const int16_t * const *yscans = vp9_scans[tx];
2162 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2163 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2164 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2165 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2166 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2167 static const int16_t band_counts[4][8] = {
2168 { 1, 2, 3, 4, 3, 16 - 13 },
2169 { 1, 2, 3, 4, 11, 64 - 21 },
2170 { 1, 2, 3, 4, 11, 256 - 21 },
2171 { 1, 2, 3, 4, 11, 1024 - 21 },
2173 const int16_t *y_band_counts = band_counts[b->tx];
2174 const int16_t *uv_band_counts = band_counts[b->uvtx];
2176 #define MERGE(la, end, step, rd) \
2177 for (n = 0; n < end; n += step) \
2178 la[n] = !!rd(&la[n])
2179 #define MERGE_CTX(step, rd) \
2181 MERGE(l, end_y, step, rd); \
2182 MERGE(a, end_x, step, rd); \
2185 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2186 for (n = 0, y = 0; y < end_y; y += step) { \
2187 for (x = 0; x < end_x; x += step, n += step * step) { \
2188 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2189 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2190 c, e, p, a[x] + l[y], yscans[txtp], \
2191 ynbs[txtp], y_band_counts, qmul[0]); \
2192 a[x] = l[y] = !!res; \
2194 AV_WN16A(&s->eob[n], res); \
2201 #define SPLAT(la, end, step, cond) \
2203 for (n = 1; n < end; n += step) \
2204 la[n] = la[n - 1]; \
2205 } else if (step == 4) { \
2207 for (n = 0; n < end; n += step) \
2208 AV_WN32A(&la[n], la[n] * 0x01010101); \
2210 for (n = 0; n < end; n += step) \
2211 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2213 } else /* step == 8 */ { \
2215 if (HAVE_FAST_64BIT) { \
2216 for (n = 0; n < end; n += step) \
2217 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2219 for (n = 0; n < end; n += step) { \
2220 uint32_t v32 = la[n] * 0x01010101; \
2221 AV_WN32A(&la[n], v32); \
2222 AV_WN32A(&la[n + 4], v32); \
2226 for (n = 0; n < end; n += step) \
2227 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2230 #define SPLAT_CTX(step) \
2232 SPLAT(a, end_x, step, end_x == w4); \
2233 SPLAT(l, end_y, step, end_y == h4); \
2239 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2242 MERGE_CTX(2, AV_RN16A);
2243 DECODE_Y_COEF_LOOP(2, 0,);
2247 MERGE_CTX(4, AV_RN32A);
2248 DECODE_Y_COEF_LOOP(4, 0,);
2252 MERGE_CTX(8, AV_RN64A);
2253 DECODE_Y_COEF_LOOP(8, 0, 32);
2258 #define DECODE_UV_COEF_LOOP(step) \
2259 for (n = 0, y = 0; y < end_y; y += step) { \
2260 for (x = 0; x < end_x; x += step, n += step * step) { \
2261 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2262 16 * step * step, c, e, p, a[x] + l[y], \
2263 uvscan, uvnb, uv_band_counts, qmul[1]); \
2264 a[x] = l[y] = !!res; \
2266 AV_WN16A(&s->uveob[pl][n], res); \
2268 s->uveob[pl][n] = res; \
2273 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2274 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2275 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2280 for (pl = 0; pl < 2; pl++) {
2281 a = &s->above_uv_nnz_ctx[pl][col];
2282 l = &s->left_uv_nnz_ctx[pl][row & 7];
2285 DECODE_UV_COEF_LOOP(1);
2288 MERGE_CTX(2, AV_RN16A);
2289 DECODE_UV_COEF_LOOP(2);
2293 MERGE_CTX(4, AV_RN32A);
2294 DECODE_UV_COEF_LOOP(4);
2298 MERGE_CTX(8, AV_RN64A);
2299 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2300 // so there is no need to loop
2301 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2302 1024, c, e, p, a[0] + l[0],
2303 uvscan, uvnb, uv_band_counts, qmul[1]);
2304 a[0] = l[0] = !!res;
2305 AV_WN16A(&s->uveob[pl][0], res);
2312 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2313 uint8_t *dst_edge, ptrdiff_t stride_edge,
2314 uint8_t *dst_inner, ptrdiff_t stride_inner,
2315 uint8_t *l, int col, int x, int w,
2316 int row, int y, enum TxfmMode tx,
2319 int have_top = row > 0 || y > 0;
2320 int have_left = col > s->tiling.tile_col_start || x > 0;
2321 int have_right = x < w - 1;
2322 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2323 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2324 { DC_127_PRED, VERT_PRED } },
2325 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2326 { HOR_PRED, HOR_PRED } },
2327 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2328 { LEFT_DC_PRED, DC_PRED } },
2329 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2330 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2331 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2332 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2333 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2334 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2335 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2336 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2337 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2338 { DC_127_PRED, VERT_LEFT_PRED } },
2339 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2340 { HOR_UP_PRED, HOR_UP_PRED } },
2341 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2342 { HOR_PRED, TM_VP8_PRED } },
2344 static const struct {
2345 uint8_t needs_left:1;
2346 uint8_t needs_top:1;
2347 uint8_t needs_topleft:1;
2348 uint8_t needs_topright:1;
2349 } edges[N_INTRA_PRED_MODES] = {
2350 [VERT_PRED] = { .needs_top = 1 },
2351 [HOR_PRED] = { .needs_left = 1 },
2352 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2353 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2354 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2355 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2356 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2357 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2358 [HOR_UP_PRED] = { .needs_left = 1 },
2359 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2360 [LEFT_DC_PRED] = { .needs_left = 1 },
2361 [TOP_DC_PRED] = { .needs_top = 1 },
2362 [DC_128_PRED] = { 0 },
2363 [DC_127_PRED] = { 0 },
2364 [DC_129_PRED] = { 0 }
2367 av_assert2(mode >= 0 && mode < 10);
2368 mode = mode_conv[mode][have_left][have_top];
2369 if (edges[mode].needs_top) {
2370 uint8_t *top, *topleft;
2371 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2372 int n_px_need_tr = 0;
2374 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2377 // if top of sb64-row, use s->intra_pred_data[] instead of
2378 // dst[-stride] for intra prediction (it contains pre- instead of
2379 // post-loopfilter data)
2381 top = !(row & 7) && !y ?
2382 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2383 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2385 topleft = !(row & 7) && !y ?
2386 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2387 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2388 &dst_inner[-stride_inner];
2392 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2393 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2394 n_px_need + n_px_need_tr <= n_px_have) {
2398 if (n_px_need <= n_px_have) {
2399 memcpy(*a, top, n_px_need);
2401 memcpy(*a, top, n_px_have);
2402 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2403 n_px_need - n_px_have);
2406 memset(*a, 127, n_px_need);
2408 if (edges[mode].needs_topleft) {
2409 if (have_left && have_top) {
2410 (*a)[-1] = topleft[-1];
2412 (*a)[-1] = have_top ? 129 : 127;
2415 if (tx == TX_4X4 && edges[mode].needs_topright) {
2416 if (have_top && have_right &&
2417 n_px_need + n_px_need_tr <= n_px_have) {
2418 memcpy(&(*a)[4], &top[4], 4);
2420 memset(&(*a)[4], (*a)[3], 4);
2425 if (edges[mode].needs_left) {
2427 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2428 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2429 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2431 if (n_px_need <= n_px_have) {
2432 for (i = 0; i < n_px_need; i++)
2433 l[n_px_need - 1 - i] = dst[i * stride - 1];
2435 for (i = 0; i < n_px_have; i++)
2436 l[n_px_need - 1 - i] = dst[i * stride - 1];
2437 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2440 memset(l, 129, 4 << tx);
2447 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2449 VP9Context *s = ctx->priv_data;
2451 int row = s->row, col = s->col;
2452 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2453 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2454 int end_x = FFMIN(2 * (s->cols - col), w4);
2455 int end_y = FFMIN(2 * (s->rows - row), h4);
2456 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2457 int uvstep1d = 1 << b->uvtx, p;
2458 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2459 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2460 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2462 for (n = 0, y = 0; y < end_y; y += step1d) {
2463 uint8_t *ptr = dst, *ptr_r = dst_r;
2464 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2465 ptr_r += 4 * step1d, n += step) {
2466 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2468 uint8_t *a = &a_buf[32];
2469 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2470 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2472 mode = check_intra_mode(s, mode, &a, ptr_r,
2473 s->frames[CUR_FRAME].tf.f->linesize[0],
2474 ptr, s->y_stride, l,
2475 col, x, w4, row, y, b->tx, 0);
2476 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2478 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2479 s->block + 16 * n, eob);
2481 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2482 dst += 4 * step1d * s->y_stride;
2489 step = 1 << (b->uvtx * 2);
2490 for (p = 0; p < 2; p++) {
2491 dst = s->dst[1 + p];
2492 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2493 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2494 uint8_t *ptr = dst, *ptr_r = dst_r;
2495 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2496 ptr_r += 4 * uvstep1d, n += step) {
2497 int mode = b->uvmode;
2498 uint8_t *a = &a_buf[16];
2499 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2501 mode = check_intra_mode(s, mode, &a, ptr_r,
2502 s->frames[CUR_FRAME].tf.f->linesize[1],
2503 ptr, s->uv_stride, l,
2504 col, x, w4, row, y, b->uvtx, p + 1);
2505 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2507 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2508 s->uvblock[p] + 16 * n, eob);
2510 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2511 dst += 4 * uvstep1d * s->uv_stride;
2516 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2517 uint8_t *dst, ptrdiff_t dst_stride,
2518 const uint8_t *ref, ptrdiff_t ref_stride,
2519 ThreadFrame *ref_frame,
2520 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2521 int bw, int bh, int w, int h)
2523 int mx = mv->x, my = mv->y, th;
2527 ref += y * ref_stride + x;
2530 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2531 // we use +7 because the last 7 pixels of each sbrow can be changed in
2532 // the longest loopfilter of the next sbrow
2533 th = (y + bh + 4 * !!my + 7) >> 6;
2534 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2535 if (x < !!mx * 3 || y < !!my * 3 ||
2536 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2537 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2538 ref - !!my * 3 * ref_stride - !!mx * 3,
2540 bw + !!mx * 7, bh + !!my * 7,
2541 x - !!mx * 3, y - !!my * 3, w, h);
2542 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2545 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2548 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2549 uint8_t *dst_u, uint8_t *dst_v,
2550 ptrdiff_t dst_stride,
2551 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2552 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2553 ThreadFrame *ref_frame,
2554 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2555 int bw, int bh, int w, int h)
2557 int mx = mv->x, my = mv->y, th;
2561 ref_u += y * src_stride_u + x;
2562 ref_v += y * src_stride_v + x;
2565 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2566 // we use +7 because the last 7 pixels of each sbrow can be changed in
2567 // the longest loopfilter of the next sbrow
2568 th = (y + bh + 4 * !!my + 7) >> 5;
2569 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2570 if (x < !!mx * 3 || y < !!my * 3 ||
2571 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2572 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2573 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2575 bw + !!mx * 7, bh + !!my * 7,
2576 x - !!mx * 3, y - !!my * 3, w, h);
2577 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2578 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2580 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2581 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2583 bw + !!mx * 7, bh + !!my * 7,
2584 x - !!mx * 3, y - !!my * 3, w, h);
2585 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2586 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2588 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2589 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2593 static void inter_recon(AVCodecContext *ctx)
2595 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2596 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2597 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2599 VP9Context *s = ctx->priv_data;
2601 int row = s->row, col = s->col;
2602 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2603 AVFrame *ref1 = tref1->f, *ref2;
2604 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2605 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2608 tref2 = &s->refs[s->refidx[b->ref[1]]];
2615 if (b->bs > BS_8x8) {
2616 if (b->bs == BS_8x4) {
2617 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2618 ref1->data[0], ref1->linesize[0], tref1,
2619 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2620 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2621 s->dst[0] + 4 * ls_y, ls_y,
2622 ref1->data[0], ref1->linesize[0], tref1,
2623 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2626 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2627 ref2->data[0], ref2->linesize[0], tref2,
2628 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2629 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2630 s->dst[0] + 4 * ls_y, ls_y,
2631 ref2->data[0], ref2->linesize[0], tref2,
2632 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2634 } else if (b->bs == BS_4x8) {
2635 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2636 ref1->data[0], ref1->linesize[0], tref1,
2637 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2638 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2639 ref1->data[0], ref1->linesize[0], tref1,
2640 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2643 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2644 ref2->data[0], ref2->linesize[0], tref2,
2645 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2646 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2647 ref2->data[0], ref2->linesize[0], tref2,
2648 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2651 av_assert2(b->bs == BS_4x4);
2653 // FIXME if two horizontally adjacent blocks have the same MV,
2654 // do a w8 instead of a w4 call
2655 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2656 ref1->data[0], ref1->linesize[0], tref1,
2657 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2658 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2659 ref1->data[0], ref1->linesize[0], tref1,
2660 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2661 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2662 s->dst[0] + 4 * ls_y, ls_y,
2663 ref1->data[0], ref1->linesize[0], tref1,
2664 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2665 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2666 s->dst[0] + 4 * ls_y + 4, ls_y,
2667 ref1->data[0], ref1->linesize[0], tref1,
2668 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2671 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2672 ref2->data[0], ref2->linesize[0], tref2,
2673 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2674 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2675 ref2->data[0], ref2->linesize[0], tref2,
2676 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2677 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2678 s->dst[0] + 4 * ls_y, ls_y,
2679 ref2->data[0], ref2->linesize[0], tref2,
2680 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2681 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2682 s->dst[0] + 4 * ls_y + 4, ls_y,
2683 ref2->data[0], ref2->linesize[0], tref2,
2684 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2688 int bwl = bwlog_tab[0][b->bs];
2689 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2691 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2692 ref1->data[0], ref1->linesize[0], tref1,
2693 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2696 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2697 ref2->data[0], ref2->linesize[0], tref2,
2698 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2703 int bwl = bwlog_tab[1][b->bs];
2704 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2713 if (b->bs > BS_8x8) {
2714 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2715 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2720 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2721 s->dst[1], s->dst[2], ls_uv,
2722 ref1->data[1], ref1->linesize[1],
2723 ref1->data[2], ref1->linesize[2], tref1,
2724 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2727 if (b->bs > BS_8x8) {
2728 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2729 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2733 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2734 s->dst[1], s->dst[2], ls_uv,
2735 ref2->data[1], ref2->linesize[1],
2736 ref2->data[2], ref2->linesize[2], tref2,
2737 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2742 /* mostly copied intra_reconn() */
2744 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2745 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2746 int end_x = FFMIN(2 * (s->cols - col), w4);
2747 int end_y = FFMIN(2 * (s->rows - row), h4);
2748 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2749 int uvstep1d = 1 << b->uvtx, p;
2750 uint8_t *dst = s->dst[0];
2753 for (n = 0, y = 0; y < end_y; y += step1d) {
2755 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2756 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2759 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2760 s->block + 16 * n, eob);
2762 dst += 4 * s->y_stride * step1d;
2768 step = 1 << (b->uvtx * 2);
2769 for (p = 0; p < 2; p++) {
2770 dst = s->dst[p + 1];
2771 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2773 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2774 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2777 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2778 s->uvblock[p] + 16 * n, eob);
2780 dst += 4 * uvstep1d * s->uv_stride;
2786 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2787 int row_and_7, int col_and_7,
2788 int w, int h, int col_end, int row_end,
2789 enum TxfmMode tx, int skip_inter)
2791 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2792 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2793 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2794 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2796 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2797 // edges. This means that for UV, we work on two subsampled blocks at
2798 // a time, and we only use the topleft block's mode information to set
2799 // things like block strength. Thus, for any block size smaller than
2800 // 16x16, ignore the odd portion of the block.
2801 if (tx == TX_4X4 && is_uv) {
2816 if (tx == TX_4X4 && !skip_inter) {
2817 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2818 int m_col_odd = (t << (w - 1)) - t;
2820 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2822 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2824 for (y = row_and_7; y < h + row_and_7; y++) {
2825 int col_mask_id = 2 - !(y & 7);
2827 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2828 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2829 // for odd lines, if the odd col is not being filtered,
2830 // skip odd row also:
2837 // if a/c are even row/col and b/d are odd, and d is skipped,
2838 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2839 if ((col_end & 1) && (y & 1)) {
2840 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2842 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2846 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2848 for (y = row_and_7; y < h + row_and_7; y++) {
2849 int col_mask_id = 2 - !(y & 3);
2851 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2852 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2853 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2854 lflvl->mask[is_uv][0][y][3] |= m_col;
2855 lflvl->mask[is_uv][1][y][3] |= m_col;
2859 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2862 int mask_id = (tx == TX_8X8);
2863 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2864 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2865 int m_row = m_col & masks[l2];
2867 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2868 // 8wd loopfilter to prevent going off the visible edge.
2869 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2870 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2871 int m_row_8 = m_row - m_row_16;
2873 for (y = row_and_7; y < h + row_and_7; y++) {
2874 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2875 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2878 for (y = row_and_7; y < h + row_and_7; y++)
2879 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2882 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2883 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2884 lflvl->mask[is_uv][1][y][0] |= m_col;
2885 if (y - row_and_7 == h - 1)
2886 lflvl->mask[is_uv][1][y][1] |= m_col;
2888 for (y = row_and_7; y < h + row_and_7; y += step1d)
2889 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2891 } else if (tx != TX_4X4) {
2894 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2895 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2896 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2897 for (y = row_and_7; y < h + row_and_7; y++)
2898 lflvl->mask[is_uv][0][y][mask_id] |= t;
2900 int t8 = t & 0x01, t4 = t - t8;
2902 for (y = row_and_7; y < h + row_and_7; y++) {
2903 lflvl->mask[is_uv][0][y][2] |= t4;
2904 lflvl->mask[is_uv][0][y][1] |= t8;
2906 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2908 int t8 = t & 0x11, t4 = t - t8;
2910 for (y = row_and_7; y < h + row_and_7; y++) {
2911 lflvl->mask[is_uv][0][y][2] |= t4;
2912 lflvl->mask[is_uv][0][y][1] |= t8;
2914 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2919 static void decode_b(AVCodecContext *ctx, int row, int col,
2920 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2921 enum BlockLevel bl, enum BlockPartition bp)
2923 VP9Context *s = ctx->priv_data;
2925 enum BlockSize bs = bl * 3 + bp;
2926 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2928 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2934 s->min_mv.x = -(128 + col * 64);
2935 s->min_mv.y = -(128 + row * 64);
2936 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2937 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2943 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2950 #define SPLAT_ZERO_CTX(v, n) \
2952 case 1: v = 0; break; \
2953 case 2: AV_ZERO16(&v); break; \
2954 case 4: AV_ZERO32(&v); break; \
2955 case 8: AV_ZERO64(&v); break; \
2956 case 16: AV_ZERO128(&v); break; \
2958 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2960 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2961 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2962 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2966 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2967 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2968 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2969 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2972 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2973 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2974 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2975 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2980 s->block += w4 * h4 * 64;
2981 s->uvblock[0] += w4 * h4 * 16;
2982 s->uvblock[1] += w4 * h4 * 16;
2983 s->eob += 4 * w4 * h4;
2984 s->uveob[0] += w4 * h4;
2985 s->uveob[1] += w4 * h4;
2991 // emulated overhangs if the stride of the target buffer can't hold. This
2992 // allows to support emu-edge and so on even if we have large block
2994 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2995 (row + h4) > s->rows;
2996 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2997 (row + h4) > s->rows;
2999 s->dst[0] = s->tmp_y;
3002 s->dst[0] = f->data[0] + yoff;
3003 s->y_stride = f->linesize[0];
3006 s->dst[1] = s->tmp_uv[0];
3007 s->dst[2] = s->tmp_uv[1];
3010 s->dst[1] = f->data[1] + uvoff;
3011 s->dst[2] = f->data[2] + uvoff;
3012 s->uv_stride = f->linesize[1];
3015 intra_recon(ctx, yoff, uvoff);
3020 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3022 for (n = 0; o < w; n++) {
3027 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3028 s->tmp_y + o, 64, h, 0, 0);
3034 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3036 for (n = 1; o < w; n++) {
3041 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3042 s->tmp_uv[0] + o, 32, h, 0, 0);
3043 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3044 s->tmp_uv[1] + o, 32, h, 0, 0);
3050 // pick filter level and find edges to apply filter to
3051 if (s->filter.level &&
3052 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3053 [b->mode[3] != ZEROMV]) > 0) {
3054 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3055 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3057 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3058 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3059 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3060 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3061 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3062 b->uvtx, skip_inter);
3064 if (!s->filter.lim_lut[lvl]) {
3065 int sharp = s->filter.sharpness;
3069 limit >>= (sharp + 3) >> 2;
3070 limit = FFMIN(limit, 9 - sharp);
3072 limit = FFMAX(limit, 1);
3074 s->filter.lim_lut[lvl] = limit;
3075 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3081 s->block += w4 * h4 * 64;
3082 s->uvblock[0] += w4 * h4 * 16;
3083 s->uvblock[1] += w4 * h4 * 16;
3084 s->eob += 4 * w4 * h4;
3085 s->uveob[0] += w4 * h4;
3086 s->uveob[1] += w4 * h4;
3090 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3091 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3093 VP9Context *s = ctx->priv_data;
3094 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3095 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3096 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3097 s->prob.p.partition[bl][c];
3098 enum BlockPartition bp;
3099 ptrdiff_t hbs = 4 >> bl;
3100 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3101 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3104 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3105 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3106 } else if (col + hbs < s->cols) { // FIXME why not <=?
3107 if (row + hbs < s->rows) { // FIXME why not <=?
3108 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3110 case PARTITION_NONE:
3111 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3114 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3115 yoff += hbs * 8 * y_stride;
3116 uvoff += hbs * 4 * uv_stride;
3117 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3120 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3123 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3125 case PARTITION_SPLIT:
3126 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3127 decode_sb(ctx, row, col + hbs, lflvl,
3128 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3129 yoff += hbs * 8 * y_stride;
3130 uvoff += hbs * 4 * uv_stride;
3131 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3132 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3133 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3138 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3139 bp = PARTITION_SPLIT;
3140 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3141 decode_sb(ctx, row, col + hbs, lflvl,
3142 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3145 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3147 } else if (row + hbs < s->rows) { // FIXME why not <=?
3148 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3149 bp = PARTITION_SPLIT;
3150 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3151 yoff += hbs * 8 * y_stride;
3152 uvoff += hbs * 4 * uv_stride;
3153 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3156 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3159 bp = PARTITION_SPLIT;
3160 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3162 s->counts.partition[bl][c][bp]++;
3165 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3166 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3168 VP9Context *s = ctx->priv_data;
3170 ptrdiff_t hbs = 4 >> bl;
3171 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3172 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3175 av_assert2(b->bl == BL_8X8);
3176 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3177 } else if (s->b->bl == bl) {
3178 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3179 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3180 yoff += hbs * 8 * y_stride;
3181 uvoff += hbs * 4 * uv_stride;
3182 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3183 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3186 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3189 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3190 if (col + hbs < s->cols) { // FIXME why not <=?
3191 if (row + hbs < s->rows) {
3192 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3193 uvoff + 4 * hbs, bl + 1);
3194 yoff += hbs * 8 * y_stride;
3195 uvoff += hbs * 4 * uv_stride;
3196 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3197 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3198 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3202 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3204 } else if (row + hbs < s->rows) {
3205 yoff += hbs * 8 * y_stride;
3206 uvoff += hbs * 4 * uv_stride;
3207 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3212 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3213 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3215 VP9Context *s = ctx->priv_data;
3216 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3217 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3218 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3221 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3222 // if you think of them as acting on a 8x8 block max, we can interleave
3223 // each v/h within the single x loop, but that only works if we work on
3224 // 8 pixel blocks, and we won't always do that (we want at least 16px
3225 // to use SSE2 optimizations, perhaps 32 for AVX2)
3227 // filter edges between columns, Y plane (e.g. block1 | block2)
3228 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3229 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3230 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3231 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3232 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3233 unsigned hm = hm1 | hm2 | hm13 | hm23;
3235 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3237 int L = *l, H = L >> 4;
3238 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3241 if (hmask1[0] & x) {
3242 if (hmask2[0] & x) {
3243 av_assert2(l[8] == L);
3244 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3246 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3248 } else if (hm2 & x) {
3251 E |= s->filter.mblim_lut[L] << 8;
3252 I |= s->filter.lim_lut[L] << 8;
3253 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3255 [0](ptr, ls_y, E, I, H);
3257 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3258 [0](ptr, ls_y, E, I, H);
3261 } else if (hm2 & x) {
3262 int L = l[8], H = L >> 4;
3263 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3266 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3267 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3271 int L = *l, H = L >> 4;
3272 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3277 E |= s->filter.mblim_lut[L] << 8;
3278 I |= s->filter.lim_lut[L] << 8;
3279 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3281 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3283 } else if (hm23 & x) {
3284 int L = l[8], H = L >> 4;
3285 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3287 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3293 // filter edges between rows, Y plane (e.g. ------)
3295 dst = f->data[0] + yoff;
3297 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3298 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3299 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3301 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3304 int L = *l, H = L >> 4;
3305 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3308 if (vmask[0] & (x << 1)) {
3309 av_assert2(l[1] == L);
3310 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3312 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3314 } else if (vm & (x << 1)) {
3317 E |= s->filter.mblim_lut[L] << 8;
3318 I |= s->filter.lim_lut[L] << 8;
3319 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3320 [!!(vmask[1] & (x << 1))]
3321 [1](ptr, ls_y, E, I, H);
3323 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3324 [1](ptr, ls_y, E, I, H);
3326 } else if (vm & (x << 1)) {
3327 int L = l[1], H = L >> 4;
3328 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3330 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3331 [1](ptr + 8, ls_y, E, I, H);
3335 int L = *l, H = L >> 4;
3336 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3338 if (vm3 & (x << 1)) {
3341 E |= s->filter.mblim_lut[L] << 8;
3342 I |= s->filter.lim_lut[L] << 8;
3343 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3345 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3347 } else if (vm3 & (x << 1)) {
3348 int L = l[1], H = L >> 4;
3349 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3351 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3356 // same principle but for U/V planes
3357 for (p = 0; p < 2; p++) {
3359 dst = f->data[1 + p] + uvoff;
3360 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3361 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3362 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3363 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3364 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3366 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3369 int L = *l, H = L >> 4;
3370 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3372 if (hmask1[0] & x) {
3373 if (hmask2[0] & x) {
3374 av_assert2(l[16] == L);
3375 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3377 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3379 } else if (hm2 & x) {
3382 E |= s->filter.mblim_lut[L] << 8;
3383 I |= s->filter.lim_lut[L] << 8;
3384 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3386 [0](ptr, ls_uv, E, I, H);
3388 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3389 [0](ptr, ls_uv, E, I, H);
3391 } else if (hm2 & x) {
3392 int L = l[16], H = L >> 4;
3393 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3395 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3396 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3404 dst = f->data[1 + p] + uvoff;
3405 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3406 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3407 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3409 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3412 int L = *l, H = L >> 4;
3413 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3416 if (vmask[0] & (x << 2)) {
3417 av_assert2(l[2] == L);
3418 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3420 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3422 } else if (vm & (x << 2)) {
3425 E |= s->filter.mblim_lut[L] << 8;
3426 I |= s->filter.lim_lut[L] << 8;
3427 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3428 [!!(vmask[1] & (x << 2))]
3429 [1](ptr, ls_uv, E, I, H);
3431 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3432 [1](ptr, ls_uv, E, I, H);
3434 } else if (vm & (x << 2)) {
3435 int L = l[2], H = L >> 4;
3436 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3438 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3439 [1](ptr + 8, ls_uv, E, I, H);
3449 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3451 int sb_start = ( idx * n) >> log2_n;
3452 int sb_end = ((idx + 1) * n) >> log2_n;
3453 *start = FFMIN(sb_start, n) << 3;
3454 *end = FFMIN(sb_end, n) << 3;
3457 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3458 int max_count, int update_factor)
3460 unsigned ct = ct0 + ct1, p2, p1;
3466 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3467 p2 = av_clip(p2, 1, 255);
3468 ct = FFMIN(ct, max_count);
3469 update_factor = FASTDIV(update_factor * ct, max_count);
3471 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3472 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3475 static void adapt_probs(VP9Context *s)
3478 prob_context *p = &s->prob_ctx[s->framectxid].p;
3479 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3482 for (i = 0; i < 4; i++)
3483 for (j = 0; j < 2; j++)
3484 for (k = 0; k < 2; k++)
3485 for (l = 0; l < 6; l++)
3486 for (m = 0; m < 6; m++) {
3487 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3488 unsigned *e = s->counts.eob[i][j][k][l][m];
3489 unsigned *c = s->counts.coef[i][j][k][l][m];
3491 if (l == 0 && m >= 3) // dc only has 3 pt
3494 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3495 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3496 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3499 if (s->keyframe || s->intraonly) {
3500 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3501 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3502 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3503 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3508 for (i = 0; i < 3; i++)
3509 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3512 for (i = 0; i < 4; i++)
3513 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3516 if (s->comppredmode == PRED_SWITCHABLE) {
3517 for (i = 0; i < 5; i++)
3518 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3522 if (s->comppredmode != PRED_SINGLEREF) {
3523 for (i = 0; i < 5; i++)
3524 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3525 s->counts.comp_ref[i][1], 20, 128);
3528 if (s->comppredmode != PRED_COMPREF) {
3529 for (i = 0; i < 5; i++) {
3530 uint8_t *pp = p->single_ref[i];
3531 unsigned (*c)[2] = s->counts.single_ref[i];
3533 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3534 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3538 // block partitioning
3539 for (i = 0; i < 4; i++)
3540 for (j = 0; j < 4; j++) {
3541 uint8_t *pp = p->partition[i][j];
3542 unsigned *c = s->counts.partition[i][j];
3544 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3545 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3546 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3550 if (s->txfmmode == TX_SWITCHABLE) {
3551 for (i = 0; i < 2; i++) {
3552 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3554 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3555 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3556 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3557 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3558 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3559 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3563 // interpolation filter
3564 if (s->filtermode == FILTER_SWITCHABLE) {
3565 for (i = 0; i < 4; i++) {
3566 uint8_t *pp = p->filter[i];
3567 unsigned *c = s->counts.filter[i];
3569 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3570 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3575 for (i = 0; i < 7; i++) {
3576 uint8_t *pp = p->mv_mode[i];
3577 unsigned *c = s->counts.mv_mode[i];
3579 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3580 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3581 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3586 uint8_t *pp = p->mv_joint;
3587 unsigned *c = s->counts.mv_joint;
3589 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3590 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3591 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3595 for (i = 0; i < 2; i++) {
3597 unsigned *c, (*c2)[2], sum;
3599 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3600 s->counts.mv_comp[i].sign[1], 20, 128);
3602 pp = p->mv_comp[i].classes;
3603 c = s->counts.mv_comp[i].classes;
3604 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3605 adapt_prob(&pp[0], c[0], sum, 20, 128);
3607 adapt_prob(&pp[1], c[1], sum, 20, 128);
3609 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3610 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3612 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3613 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3615 adapt_prob(&pp[6], c[6], sum, 20, 128);
3616 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3617 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3618 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3620 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3621 s->counts.mv_comp[i].class0[1], 20, 128);
3622 pp = p->mv_comp[i].bits;
3623 c2 = s->counts.mv_comp[i].bits;
3624 for (j = 0; j < 10; j++)
3625 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3627 for (j = 0; j < 2; j++) {
3628 pp = p->mv_comp[i].class0_fp[j];
3629 c = s->counts.mv_comp[i].class0_fp[j];
3630 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3631 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3632 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3634 pp = p->mv_comp[i].fp;
3635 c = s->counts.mv_comp[i].fp;
3636 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3637 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3638 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3640 if (s->highprecisionmvs) {
3641 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3642 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3643 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3644 s->counts.mv_comp[i].hp[1], 20, 128);
3649 for (i = 0; i < 4; i++) {
3650 uint8_t *pp = p->y_mode[i];
3651 unsigned *c = s->counts.y_mode[i], sum, s2;
3653 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3654 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3655 sum -= c[TM_VP8_PRED];
3656 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3657 sum -= c[VERT_PRED];
3658 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3659 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3661 adapt_prob(&pp[3], s2, sum, 20, 128);
3663 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3664 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3665 sum -= c[DIAG_DOWN_LEFT_PRED];
3666 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3667 sum -= c[VERT_LEFT_PRED];
3668 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3669 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3673 for (i = 0; i < 10; i++) {
3674 uint8_t *pp = p->uv_mode[i];
3675 unsigned *c = s->counts.uv_mode[i], sum, s2;
3677 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3678 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3679 sum -= c[TM_VP8_PRED];
3680 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3681 sum -= c[VERT_PRED];
3682 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3683 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3685 adapt_prob(&pp[3], s2, sum, 20, 128);
3687 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3688 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3689 sum -= c[DIAG_DOWN_LEFT_PRED];
3690 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3691 sum -= c[VERT_LEFT_PRED];
3692 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3693 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3697 static void free_buffers(VP9Context *s)
3699 av_freep(&s->intra_pred_data[0]);
3700 av_freep(&s->b_base);
3701 av_freep(&s->block_base);
3704 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3706 VP9Context *s = ctx->priv_data;
3709 for (i = 0; i < 2; i++) {
3710 if (s->frames[i].tf.f->data[0])
3711 vp9_unref_frame(ctx, &s->frames[i]);
3712 av_frame_free(&s->frames[i].tf.f);
3714 for (i = 0; i < 8; i++) {
3715 if (s->refs[i].f->data[0])
3716 ff_thread_release_buffer(ctx, &s->refs[i]);
3717 av_frame_free(&s->refs[i].f);
3718 if (s->next_refs[i].f->data[0])
3719 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3720 av_frame_free(&s->next_refs[i].f);
3730 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3731 int *got_frame, AVPacket *pkt)
3733 const uint8_t *data = pkt->data;
3734 int size = pkt->size;
3735 VP9Context *s = ctx->priv_data;
3736 int res, tile_row, tile_col, i, ref, row, col;
3737 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3740 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3742 } else if (res == 0) {
3743 if (!s->refs[ref].f->data[0]) {
3744 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3745 return AVERROR_INVALIDDATA;
3747 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3755 if (s->frames[LAST_FRAME].tf.f->data[0])
3756 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3757 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3758 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3760 if (s->frames[CUR_FRAME].tf.f->data[0])
3761 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3762 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3764 f = s->frames[CUR_FRAME].tf.f;
3765 f->key_frame = s->keyframe;
3766 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3767 ls_y = f->linesize[0];
3768 ls_uv =f->linesize[1];
3771 for (i = 0; i < 8; i++) {
3772 if (s->next_refs[i].f->data[0])
3773 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3774 if (s->refreshrefmask & (1 << i)) {
3775 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3777 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3783 // main tile decode loop
3784 memset(s->above_partition_ctx, 0, s->cols);
3785 memset(s->above_skip_ctx, 0, s->cols);
3786 if (s->keyframe || s->intraonly) {
3787 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3789 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3791 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3792 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3793 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3794 memset(s->above_segpred_ctx, 0, s->cols);
3795 s->pass = s->uses_2pass =
3796 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3797 if ((res = update_block_buffers(ctx)) < 0) {
3798 av_log(ctx, AV_LOG_ERROR,
3799 "Failed to allocate block buffers\n");
3802 if (s->refreshctx && s->parallelmode) {
3805 for (i = 0; i < 4; i++) {
3806 for (j = 0; j < 2; j++)
3807 for (k = 0; k < 2; k++)
3808 for (l = 0; l < 6; l++)
3809 for (m = 0; m < 6; m++)
3810 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3811 s->prob.coef[i][j][k][l][m], 3);
3812 if (s->txfmmode == i)
3815 s->prob_ctx[s->framectxid].p = s->prob.p;
3816 ff_thread_finish_setup(ctx);
3822 s->block = s->block_base;
3823 s->uvblock[0] = s->uvblock_base[0];
3824 s->uvblock[1] = s->uvblock_base[1];
3825 s->eob = s->eob_base;
3826 s->uveob[0] = s->uveob_base[0];
3827 s->uveob[1] = s->uveob_base[1];
3829 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3830 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3831 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3833 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3836 if (tile_col == s->tiling.tile_cols - 1 &&
3837 tile_row == s->tiling.tile_rows - 1) {
3840 tile_size = AV_RB32(data);
3844 if (tile_size > size) {
3845 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3846 return AVERROR_INVALIDDATA;
3848 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3849 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3850 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3851 return AVERROR_INVALIDDATA;
3858 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3859 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3860 struct VP9Filter *lflvl_ptr = s->lflvl;
3861 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3863 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3864 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3865 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3868 memset(s->left_partition_ctx, 0, 8);
3869 memset(s->left_skip_ctx, 0, 8);
3870 if (s->keyframe || s->intraonly) {
3871 memset(s->left_mode_ctx, DC_PRED, 16);
3873 memset(s->left_mode_ctx, NEARESTMV, 8);
3875 memset(s->left_y_nnz_ctx, 0, 16);
3876 memset(s->left_uv_nnz_ctx, 0, 16);
3877 memset(s->left_segpred_ctx, 0, 8);
3879 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3882 for (col = s->tiling.tile_col_start;
3883 col < s->tiling.tile_col_end;
3884 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3885 // FIXME integrate with lf code (i.e. zero after each
3886 // use, similar to invtxfm coefficients, or similar)
3888 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3892 decode_sb_mem(ctx, row, col, lflvl_ptr,
3893 yoff2, uvoff2, BL_64X64);
3895 decode_sb(ctx, row, col, lflvl_ptr,
3896 yoff2, uvoff2, BL_64X64);
3900 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3908 // backup pre-loopfilter reconstruction data for intra
3909 // prediction of next row of sb64s
3910 if (row + 8 < s->rows) {
3911 memcpy(s->intra_pred_data[0],
3912 f->data[0] + yoff + 63 * ls_y,
3914 memcpy(s->intra_pred_data[1],
3915 f->data[1] + uvoff + 31 * ls_uv,
3917 memcpy(s->intra_pred_data[2],
3918 f->data[2] + uvoff + 31 * ls_uv,
3922 // loopfilter one row
3923 if (s->filter.level) {
3926 lflvl_ptr = s->lflvl;
3927 for (col = 0; col < s->cols;
3928 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3929 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3933 // FIXME maybe we can make this more finegrained by running the
3934 // loopfilter per-block instead of after each sbrow
3935 // In fact that would also make intra pred left preparation easier?
3936 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3940 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3942 ff_thread_finish_setup(ctx);
3944 } while (s->pass++ == 1);
3945 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3948 for (i = 0; i < 8; i++) {
3949 if (s->refs[i].f->data[0])
3950 ff_thread_release_buffer(ctx, &s->refs[i]);
3951 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3954 if (!s->invisible) {
3955 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3963 static void vp9_decode_flush(AVCodecContext *ctx)
3965 VP9Context *s = ctx->priv_data;
3968 for (i = 0; i < 2; i++)
3969 vp9_unref_frame(ctx, &s->frames[i]);
3970 for (i = 0; i < 8; i++)
3971 ff_thread_release_buffer(ctx, &s->refs[i]);
3974 static int init_frames(AVCodecContext *ctx)
3976 VP9Context *s = ctx->priv_data;
3979 for (i = 0; i < 2; i++) {
3980 s->frames[i].tf.f = av_frame_alloc();
3981 if (!s->frames[i].tf.f) {
3982 vp9_decode_free(ctx);
3983 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3984 return AVERROR(ENOMEM);
3987 for (i = 0; i < 8; i++) {
3988 s->refs[i].f = av_frame_alloc();
3989 s->next_refs[i].f = av_frame_alloc();
3990 if (!s->refs[i].f || !s->next_refs[i].f) {
3991 vp9_decode_free(ctx);
3992 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3993 return AVERROR(ENOMEM);
4000 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4002 VP9Context *s = ctx->priv_data;
4004 ctx->internal->allocate_progress = 1;
4005 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4006 ff_vp9dsp_init(&s->dsp);
4007 ff_videodsp_init(&s->vdsp, 8);
4008 s->filter.sharpness = -1;
4010 return init_frames(ctx);
4013 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4015 return init_frames(avctx);
4018 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4021 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4023 // detect size changes in other threads
4024 if (s->intra_pred_data[0] &&
4025 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4029 for (i = 0; i < 2; i++) {
4030 if (s->frames[i].tf.f->data[0])
4031 vp9_unref_frame(dst, &s->frames[i]);
4032 if (ssrc->frames[i].tf.f->data[0]) {
4033 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4037 for (i = 0; i < 8; i++) {
4038 if (s->refs[i].f->data[0])
4039 ff_thread_release_buffer(dst, &s->refs[i]);
4040 if (ssrc->next_refs[i].f->data[0]) {
4041 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4046 s->invisible = ssrc->invisible;
4047 s->keyframe = ssrc->keyframe;
4048 s->uses_2pass = ssrc->uses_2pass;
4049 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4050 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4051 if (ssrc->segmentation.enabled) {
4052 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4053 sizeof(s->segmentation.feat));
4059 AVCodec ff_vp9_decoder = {
4061 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4062 .type = AVMEDIA_TYPE_VIDEO,
4063 .id = AV_CODEC_ID_VP9,
4064 .priv_data_size = sizeof(VP9Context),
4065 .init = vp9_decode_init,
4066 .close = vp9_decode_free,
4067 .decode = vp9_decode_frame,
4068 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4069 .flush = vp9_decode_flush,
4070 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4071 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),