2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
151 uint8_t absolute_vals;
157 uint8_t skip_enabled;
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 unsigned sb_cols, sb_rows, rows, cols;
173 uint8_t coef[4][2][2][6][6][3];
177 uint8_t coef[4][2][2][6][6][11];
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
194 unsigned mv_joint[4];
197 unsigned classes[11];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
202 unsigned class0_hp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
212 // contextual (left/above) cache
213 uint8_t left_partition_ctx[8], *above_partition_ctx;
214 uint8_t left_mode_ctx[16], *above_mode_ctx;
215 // FIXME maybe merge some of the below in a flags field?
216 uint8_t left_y_nnz_ctx[16], *above_y_nnz_ctx;
217 uint8_t left_uv_nnz_ctx[2][8], *above_uv_nnz_ctx[2];
218 uint8_t left_skip_ctx[8], *above_skip_ctx; // 1bit
219 uint8_t left_txfm_ctx[8], *above_txfm_ctx; // 2bit
220 uint8_t left_segpred_ctx[8], *above_segpred_ctx; // 1bit
221 uint8_t left_intra_ctx[8], *above_intra_ctx; // 1bit
222 uint8_t left_comp_ctx[8], *above_comp_ctx; // 1bit
223 uint8_t left_ref_ctx[8], *above_ref_ctx; // 2bit
224 uint8_t left_filter_ctx[8], *above_filter_ctx;
225 VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
228 uint8_t *intra_pred_data[3];
229 struct VP9Filter *lflvl;
230 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
232 // block reconstruction intermediates
233 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
234 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
235 struct { int x, y; } min_mv, max_mv;
236 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
237 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
240 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
242 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
243 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
245 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
246 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
250 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
252 VP9Context *s = ctx->priv_data;
255 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
257 sz = 64 * s->sb_cols * s->sb_rows;
258 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
259 ff_thread_release_buffer(ctx, &f->tf);
260 return AVERROR(ENOMEM);
263 f->segmentation_map = f->extradata->data;
264 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
266 // retain segmentation map if it doesn't update
267 if (s->segmentation.enabled && !s->segmentation.update_map &&
268 !s->keyframe && !s->intraonly) {
269 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
275 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
277 ff_thread_release_buffer(ctx, &f->tf);
278 av_buffer_unref(&f->extradata);
281 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
285 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
287 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
288 vp9_unref_frame(ctx, dst);
289 return AVERROR(ENOMEM);
292 dst->segmentation_map = src->segmentation_map;
298 static int update_size(AVCodecContext *ctx, int w, int h)
300 VP9Context *s = ctx->priv_data;
303 av_assert0(w > 0 && h > 0);
305 if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
310 s->sb_cols = (w + 63) >> 6;
311 s->sb_rows = (h + 63) >> 6;
312 s->cols = (w + 7) >> 3;
313 s->rows = (h + 7) >> 3;
315 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
316 av_freep(&s->above_partition_ctx);
317 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
319 return AVERROR(ENOMEM);
320 assign(s->above_partition_ctx, uint8_t *, 8);
321 assign(s->above_skip_ctx, uint8_t *, 8);
322 assign(s->above_txfm_ctx, uint8_t *, 8);
323 assign(s->above_mode_ctx, uint8_t *, 16);
324 assign(s->above_y_nnz_ctx, uint8_t *, 16);
325 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
326 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
327 assign(s->intra_pred_data[0], uint8_t *, 64);
328 assign(s->intra_pred_data[1], uint8_t *, 32);
329 assign(s->intra_pred_data[2], uint8_t *, 32);
330 assign(s->above_segpred_ctx, uint8_t *, 8);
331 assign(s->above_intra_ctx, uint8_t *, 8);
332 assign(s->above_comp_ctx, uint8_t *, 8);
333 assign(s->above_ref_ctx, uint8_t *, 8);
334 assign(s->above_filter_ctx, uint8_t *, 8);
335 assign(s->lflvl, struct VP9Filter *, 1);
336 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
340 av_free(s->block_base);
341 if (ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode) {
342 int sbs = s->sb_cols * s->sb_rows;
344 s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
345 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
346 if (!s->b_base || !s->block_base)
347 return AVERROR(ENOMEM);
348 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
349 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
350 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
351 s->uveob_base[0] = s->eob_base + 256 * sbs;
352 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
354 s->b_base = av_malloc(sizeof(VP9Block));
355 s->block_base = av_mallocz((64 * 64 + 128) * 3);
356 if (!s->b_base || !s->block_base)
357 return AVERROR(ENOMEM);
358 s->uvblock_base[0] = s->block_base + 64 * 64;
359 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
360 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
361 s->uveob_base[0] = s->eob_base + 256;
362 s->uveob_base[1] = s->uveob_base[0] + 64;
368 // for some reason the sign bit is at the end, not the start, of a bit sequence
369 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
371 int v = get_bits(gb, n);
372 return get_bits1(gb) ? -v : v;
375 static av_always_inline int inv_recenter_nonneg(int v, int m)
377 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
380 // differential forward probability updates
381 static int update_prob(VP56RangeCoder *c, int p)
383 static const int inv_map_table[254] = {
384 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
385 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
386 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
387 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
388 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
389 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
390 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
391 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
392 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
393 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
394 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
395 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
396 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
397 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
398 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
399 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
400 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
401 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
406 /* This code is trying to do a differential probability update. For a
407 * current probability A in the range [1, 255], the difference to a new
408 * probability of any value can be expressed differentially as 1-A,255-A
409 * where some part of this (absolute range) exists both in positive as
410 * well as the negative part, whereas another part only exists in one
411 * half. We're trying to code this shared part differentially, i.e.
412 * times two where the value of the lowest bit specifies the sign, and
413 * the single part is then coded on top of this. This absolute difference
414 * then again has a value of [0,254], but a bigger value in this range
415 * indicates that we're further away from the original value A, so we
416 * can code this as a VLC code, since higher values are increasingly
417 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
418 * updates vs. the 'fine, exact' updates further down the range, which
419 * adds one extra dimension to this differential update model. */
421 if (!vp8_rac_get(c)) {
422 d = vp8_rac_get_uint(c, 4) + 0;
423 } else if (!vp8_rac_get(c)) {
424 d = vp8_rac_get_uint(c, 4) + 16;
425 } else if (!vp8_rac_get(c)) {
426 d = vp8_rac_get_uint(c, 5) + 32;
428 d = vp8_rac_get_uint(c, 7);
430 d = (d << 1) - 65 + vp8_rac_get(c);
434 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
435 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
438 static int decode_frame_header(AVCodecContext *ctx,
439 const uint8_t *data, int size, int *ref)
441 VP9Context *s = ctx->priv_data;
442 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
444 const uint8_t *data2;
447 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
448 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
451 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
452 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
453 return AVERROR_INVALIDDATA;
455 s->profile = get_bits1(&s->gb);
456 if (get_bits1(&s->gb)) { // reserved bit
457 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
458 return AVERROR_INVALIDDATA;
460 if (get_bits1(&s->gb)) {
461 *ref = get_bits(&s->gb, 3);
464 s->last_uses_2pass = s->uses_2pass;
465 s->last_keyframe = s->keyframe;
466 s->keyframe = !get_bits1(&s->gb);
467 last_invisible = s->invisible;
468 s->invisible = !get_bits1(&s->gb);
469 s->errorres = get_bits1(&s->gb);
470 // FIXME disable this upon resolution change
471 s->use_last_frame_mvs = !s->errorres && !last_invisible;
473 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
474 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
475 return AVERROR_INVALIDDATA;
477 s->colorspace = get_bits(&s->gb, 3);
478 if (s->colorspace == 7) { // RGB = profile 1
479 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
480 return AVERROR_INVALIDDATA;
482 s->fullrange = get_bits1(&s->gb);
483 // for profile 1, here follows the subsampling bits
484 s->refreshrefmask = 0xff;
485 w = get_bits(&s->gb, 16) + 1;
486 h = get_bits(&s->gb, 16) + 1;
487 if (get_bits1(&s->gb)) // display size
488 skip_bits(&s->gb, 32);
490 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
491 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
493 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
494 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
495 return AVERROR_INVALIDDATA;
497 s->refreshrefmask = get_bits(&s->gb, 8);
498 w = get_bits(&s->gb, 16) + 1;
499 h = get_bits(&s->gb, 16) + 1;
500 if (get_bits1(&s->gb)) // display size
501 skip_bits(&s->gb, 32);
503 s->refreshrefmask = get_bits(&s->gb, 8);
504 s->refidx[0] = get_bits(&s->gb, 3);
505 s->signbias[0] = get_bits1(&s->gb);
506 s->refidx[1] = get_bits(&s->gb, 3);
507 s->signbias[1] = get_bits1(&s->gb);
508 s->refidx[2] = get_bits(&s->gb, 3);
509 s->signbias[2] = get_bits1(&s->gb);
510 if (!s->refs[s->refidx[0]].f->data[0] ||
511 !s->refs[s->refidx[1]].f->data[0] ||
512 !s->refs[s->refidx[2]].f->data[0]) {
513 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
514 return AVERROR_INVALIDDATA;
516 if (get_bits1(&s->gb)) {
517 w = s->refs[s->refidx[0]].f->width;
518 h = s->refs[s->refidx[0]].f->height;
519 } else if (get_bits1(&s->gb)) {
520 w = s->refs[s->refidx[1]].f->width;
521 h = s->refs[s->refidx[1]].f->height;
522 } else if (get_bits1(&s->gb)) {
523 w = s->refs[s->refidx[2]].f->width;
524 h = s->refs[s->refidx[2]].f->height;
526 w = get_bits(&s->gb, 16) + 1;
527 h = get_bits(&s->gb, 16) + 1;
529 if (get_bits1(&s->gb)) // display size
530 skip_bits(&s->gb, 32);
531 s->highprecisionmvs = get_bits1(&s->gb);
532 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
534 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
535 s->signbias[0] != s->signbias[2];
536 if (s->allowcompinter) {
537 if (s->signbias[0] == s->signbias[1]) {
539 s->varcompref[0] = 0;
540 s->varcompref[1] = 1;
541 } else if (s->signbias[0] == s->signbias[2]) {
543 s->varcompref[0] = 0;
544 s->varcompref[1] = 2;
547 s->varcompref[0] = 1;
548 s->varcompref[1] = 2;
553 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
554 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
555 s->framectxid = c = get_bits(&s->gb, 2);
557 /* loopfilter header data */
558 s->filter.level = get_bits(&s->gb, 6);
559 sharp = get_bits(&s->gb, 3);
560 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
561 // the old cache values since they are still valid
562 if (s->filter.sharpness != sharp)
563 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
564 s->filter.sharpness = sharp;
565 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
566 if (get_bits1(&s->gb)) {
567 for (i = 0; i < 4; i++)
568 if (get_bits1(&s->gb))
569 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
570 for (i = 0; i < 2; i++)
571 if (get_bits1(&s->gb))
572 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
575 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
578 /* quantization header data */
579 s->yac_qi = get_bits(&s->gb, 8);
580 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
581 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
582 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
583 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
584 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
586 /* segmentation header info */
587 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
588 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
589 for (i = 0; i < 7; i++)
590 s->prob.seg[i] = get_bits1(&s->gb) ?
591 get_bits(&s->gb, 8) : 255;
592 if ((s->segmentation.temporal = get_bits1(&s->gb)))
593 for (i = 0; i < 3; i++)
594 s->prob.segpred[i] = get_bits1(&s->gb) ?
595 get_bits(&s->gb, 8) : 255;
598 if (get_bits1(&s->gb)) {
599 s->segmentation.absolute_vals = get_bits1(&s->gb);
600 for (i = 0; i < 8; i++) {
601 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
602 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
603 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
604 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
605 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
606 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
607 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
611 s->segmentation.feat[0].q_enabled = 0;
612 s->segmentation.feat[0].lf_enabled = 0;
613 s->segmentation.feat[0].skip_enabled = 0;
614 s->segmentation.feat[0].ref_enabled = 0;
617 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
618 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
619 int qyac, qydc, quvac, quvdc, lflvl, sh;
621 if (s->segmentation.feat[i].q_enabled) {
622 if (s->segmentation.absolute_vals)
623 qyac = s->segmentation.feat[i].q_val;
625 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
629 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
630 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
631 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
632 qyac = av_clip_uintp2(qyac, 8);
634 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
635 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
636 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
637 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
639 sh = s->filter.level >= 32;
640 if (s->segmentation.feat[i].lf_enabled) {
641 if (s->segmentation.absolute_vals)
642 lflvl = s->segmentation.feat[i].lf_val;
644 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
646 lflvl = s->filter.level;
648 s->segmentation.feat[i].lflvl[0][0] =
649 s->segmentation.feat[i].lflvl[0][1] =
650 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
651 for (j = 1; j < 4; j++) {
652 s->segmentation.feat[i].lflvl[j][0] =
653 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
654 s->lf_delta.mode[0]) << sh), 6);
655 s->segmentation.feat[i].lflvl[j][1] =
656 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
657 s->lf_delta.mode[1]) << sh), 6);
662 if ((res = update_size(ctx, w, h)) < 0) {
663 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
666 for (s->tiling.log2_tile_cols = 0;
667 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
668 s->tiling.log2_tile_cols++) ;
669 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
670 max = FFMAX(0, max - 1);
671 while (max > s->tiling.log2_tile_cols) {
672 if (get_bits1(&s->gb))
673 s->tiling.log2_tile_cols++;
677 s->tiling.log2_tile_rows = decode012(&s->gb);
678 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
679 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
680 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
681 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
682 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
684 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
685 return AVERROR(ENOMEM);
689 if (s->keyframe || s->errorres || s->intraonly) {
690 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
691 s->prob_ctx[3].p = vp9_default_probs;
692 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
693 sizeof(vp9_default_coef_probs));
694 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
695 sizeof(vp9_default_coef_probs));
696 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
697 sizeof(vp9_default_coef_probs));
698 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
699 sizeof(vp9_default_coef_probs));
702 // next 16 bits is size of the rest of the header (arith-coded)
703 size2 = get_bits(&s->gb, 16);
704 data2 = align_get_bits(&s->gb);
705 if (size2 > size - (data2 - data)) {
706 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
707 return AVERROR_INVALIDDATA;
709 ff_vp56_init_range_decoder(&s->c, data2, size2);
710 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
711 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
712 return AVERROR_INVALIDDATA;
715 if (s->keyframe || s->intraonly) {
716 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
718 memset(&s->counts, 0, sizeof(s->counts));
720 // FIXME is it faster to not copy here, but do it down in the fw updates
721 // as explicit copies if the fw update is missing (and skip the copy upon
723 s->prob.p = s->prob_ctx[c].p;
727 s->txfmmode = TX_4X4;
729 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
730 if (s->txfmmode == 3)
731 s->txfmmode += vp8_rac_get(&s->c);
733 if (s->txfmmode == TX_SWITCHABLE) {
734 for (i = 0; i < 2; i++)
735 if (vp56_rac_get_prob_branchy(&s->c, 252))
736 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
737 for (i = 0; i < 2; i++)
738 for (j = 0; j < 2; j++)
739 if (vp56_rac_get_prob_branchy(&s->c, 252))
740 s->prob.p.tx16p[i][j] =
741 update_prob(&s->c, s->prob.p.tx16p[i][j]);
742 for (i = 0; i < 2; i++)
743 for (j = 0; j < 3; j++)
744 if (vp56_rac_get_prob_branchy(&s->c, 252))
745 s->prob.p.tx32p[i][j] =
746 update_prob(&s->c, s->prob.p.tx32p[i][j]);
751 for (i = 0; i < 4; i++) {
752 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
753 if (vp8_rac_get(&s->c)) {
754 for (j = 0; j < 2; j++)
755 for (k = 0; k < 2; k++)
756 for (l = 0; l < 6; l++)
757 for (m = 0; m < 6; m++) {
758 uint8_t *p = s->prob.coef[i][j][k][l][m];
759 uint8_t *r = ref[j][k][l][m];
760 if (m >= 3 && l == 0) // dc only has 3 pt
762 for (n = 0; n < 3; n++) {
763 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
764 p[n] = update_prob(&s->c, r[n]);
772 for (j = 0; j < 2; j++)
773 for (k = 0; k < 2; k++)
774 for (l = 0; l < 6; l++)
775 for (m = 0; m < 6; m++) {
776 uint8_t *p = s->prob.coef[i][j][k][l][m];
777 uint8_t *r = ref[j][k][l][m];
778 if (m > 3 && l == 0) // dc only has 3 pt
784 if (s->txfmmode == i)
789 for (i = 0; i < 3; i++)
790 if (vp56_rac_get_prob_branchy(&s->c, 252))
791 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
792 if (!s->keyframe && !s->intraonly) {
793 for (i = 0; i < 7; i++)
794 for (j = 0; j < 3; j++)
795 if (vp56_rac_get_prob_branchy(&s->c, 252))
796 s->prob.p.mv_mode[i][j] =
797 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
799 if (s->filtermode == FILTER_SWITCHABLE)
800 for (i = 0; i < 4; i++)
801 for (j = 0; j < 2; j++)
802 if (vp56_rac_get_prob_branchy(&s->c, 252))
803 s->prob.p.filter[i][j] =
804 update_prob(&s->c, s->prob.p.filter[i][j]);
806 for (i = 0; i < 4; i++)
807 if (vp56_rac_get_prob_branchy(&s->c, 252))
808 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
810 if (s->allowcompinter) {
811 s->comppredmode = vp8_rac_get(&s->c);
813 s->comppredmode += vp8_rac_get(&s->c);
814 if (s->comppredmode == PRED_SWITCHABLE)
815 for (i = 0; i < 5; i++)
816 if (vp56_rac_get_prob_branchy(&s->c, 252))
818 update_prob(&s->c, s->prob.p.comp[i]);
820 s->comppredmode = PRED_SINGLEREF;
823 if (s->comppredmode != PRED_COMPREF) {
824 for (i = 0; i < 5; i++) {
825 if (vp56_rac_get_prob_branchy(&s->c, 252))
826 s->prob.p.single_ref[i][0] =
827 update_prob(&s->c, s->prob.p.single_ref[i][0]);
828 if (vp56_rac_get_prob_branchy(&s->c, 252))
829 s->prob.p.single_ref[i][1] =
830 update_prob(&s->c, s->prob.p.single_ref[i][1]);
834 if (s->comppredmode != PRED_SINGLEREF) {
835 for (i = 0; i < 5; i++)
836 if (vp56_rac_get_prob_branchy(&s->c, 252))
837 s->prob.p.comp_ref[i] =
838 update_prob(&s->c, s->prob.p.comp_ref[i]);
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 9; j++)
843 if (vp56_rac_get_prob_branchy(&s->c, 252))
844 s->prob.p.y_mode[i][j] =
845 update_prob(&s->c, s->prob.p.y_mode[i][j]);
847 for (i = 0; i < 4; i++)
848 for (j = 0; j < 4; j++)
849 for (k = 0; k < 3; k++)
850 if (vp56_rac_get_prob_branchy(&s->c, 252))
851 s->prob.p.partition[3 - i][j][k] =
852 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
854 // mv fields don't use the update_prob subexp model for some reason
855 for (i = 0; i < 3; i++)
856 if (vp56_rac_get_prob_branchy(&s->c, 252))
857 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
859 for (i = 0; i < 2; i++) {
860 if (vp56_rac_get_prob_branchy(&s->c, 252))
861 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
863 for (j = 0; j < 10; j++)
864 if (vp56_rac_get_prob_branchy(&s->c, 252))
865 s->prob.p.mv_comp[i].classes[j] =
866 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
868 if (vp56_rac_get_prob_branchy(&s->c, 252))
869 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
871 for (j = 0; j < 10; j++)
872 if (vp56_rac_get_prob_branchy(&s->c, 252))
873 s->prob.p.mv_comp[i].bits[j] =
874 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
877 for (i = 0; i < 2; i++) {
878 for (j = 0; j < 2; j++)
879 for (k = 0; k < 3; k++)
880 if (vp56_rac_get_prob_branchy(&s->c, 252))
881 s->prob.p.mv_comp[i].class0_fp[j][k] =
882 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
884 for (j = 0; j < 3; j++)
885 if (vp56_rac_get_prob_branchy(&s->c, 252))
886 s->prob.p.mv_comp[i].fp[j] =
887 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
890 if (s->highprecisionmvs) {
891 for (i = 0; i < 2; i++) {
892 if (vp56_rac_get_prob_branchy(&s->c, 252))
893 s->prob.p.mv_comp[i].class0_hp =
894 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
896 if (vp56_rac_get_prob_branchy(&s->c, 252))
897 s->prob.p.mv_comp[i].hp =
898 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
903 return (data2 - data) + size2;
906 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
909 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
910 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
913 static void find_ref_mvs(VP9Context *s,
914 VP56mv *pmv, int ref, int z, int idx, int sb)
916 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
917 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
918 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
919 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
920 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
921 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
922 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
923 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
924 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
925 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
926 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
927 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
928 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
929 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
930 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
931 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
932 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
933 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
934 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
935 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
936 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
937 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
938 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
939 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
940 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
941 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
942 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
945 int row = s->row, col = s->col, row7 = s->row7;
946 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
947 #define INVALID_MV 0x80008000U
948 uint32_t mem = INVALID_MV;
951 #define RETURN_DIRECT_MV(mv) \
953 uint32_t m = AV_RN32A(&mv); \
957 } else if (mem == INVALID_MV) { \
959 } else if (m != mem) { \
966 if (sb == 2 || sb == 1) {
967 RETURN_DIRECT_MV(b->mv[0][z]);
968 } else if (sb == 3) {
969 RETURN_DIRECT_MV(b->mv[2][z]);
970 RETURN_DIRECT_MV(b->mv[1][z]);
971 RETURN_DIRECT_MV(b->mv[0][z]);
974 #define RETURN_MV(mv) \
979 clamp_mv(&tmp, &mv, s); \
980 m = AV_RN32A(&tmp); \
984 } else if (mem == INVALID_MV) { \
986 } else if (m != mem) { \
991 uint32_t m = AV_RN32A(&mv); \
993 clamp_mv(pmv, &mv, s); \
995 } else if (mem == INVALID_MV) { \
997 } else if (m != mem) { \
998 clamp_mv(pmv, &mv, s); \
1005 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1006 if (mv->ref[0] == ref) {
1007 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1008 } else if (mv->ref[1] == ref) {
1009 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1012 if (col > s->tiling.tile_col_start) {
1013 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1014 if (mv->ref[0] == ref) {
1015 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1016 } else if (mv->ref[1] == ref) {
1017 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1025 // previously coded MVs in this neighbourhood, using same reference frame
1026 for (; i < 8; i++) {
1027 int c = p[i][0] + col, r = p[i][1] + row;
1029 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1030 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1032 if (mv->ref[0] == ref) {
1033 RETURN_MV(mv->mv[0]);
1034 } else if (mv->ref[1] == ref) {
1035 RETURN_MV(mv->mv[1]);
1040 // MV at this position in previous frame, using same reference frame
1041 if (s->use_last_frame_mvs) {
1042 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1044 if (!s->last_uses_2pass)
1045 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1046 if (mv->ref[0] == ref) {
1047 RETURN_MV(mv->mv[0]);
1048 } else if (mv->ref[1] == ref) {
1049 RETURN_MV(mv->mv[1]);
1053 #define RETURN_SCALE_MV(mv, scale) \
1056 VP56mv mv_temp = { -mv.x, -mv.y }; \
1057 RETURN_MV(mv_temp); \
1063 // previously coded MVs in this neighbourhood, using different reference frame
1064 for (i = 0; i < 8; i++) {
1065 int c = p[i][0] + col, r = p[i][1] + row;
1067 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1068 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1070 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1071 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1073 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1074 // BUG - libvpx has this condition regardless of whether
1075 // we used the first ref MV and pre-scaling
1076 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1077 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1082 // MV at this position in previous frame, using different reference frame
1083 if (s->use_last_frame_mvs) {
1084 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1086 // no need to await_progress, because we already did that above
1087 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1088 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1090 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1091 // BUG - libvpx has this condition regardless of whether
1092 // we used the first ref MV and pre-scaling
1093 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1094 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1101 #undef RETURN_SCALE_MV
1104 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1106 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1107 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1108 s->prob.p.mv_comp[idx].classes);
1110 s->counts.mv_comp[idx].sign[sign]++;
1111 s->counts.mv_comp[idx].classes[c]++;
1115 for (n = 0, m = 0; m < c; m++) {
1116 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1118 s->counts.mv_comp[idx].bits[m][bit]++;
1121 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1123 s->counts.mv_comp[idx].fp[bit]++;
1125 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1126 s->counts.mv_comp[idx].hp[bit]++;
1130 // bug in libvpx - we count for bw entropy purposes even if the
1132 s->counts.mv_comp[idx].hp[1]++;
1136 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1137 s->counts.mv_comp[idx].class0[n]++;
1138 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1139 s->prob.p.mv_comp[idx].class0_fp[n]);
1140 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1141 n = (n << 3) | (bit << 1);
1143 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1144 s->counts.mv_comp[idx].class0_hp[bit]++;
1148 // bug in libvpx - we count for bw entropy purposes even if the
1150 s->counts.mv_comp[idx].class0_hp[1]++;
1154 return sign ? -(n + 1) : (n + 1);
1157 static void fill_mv(VP9Context *s,
1158 VP56mv *mv, int mode, int sb)
1162 if (mode == ZEROMV) {
1163 memset(mv, 0, sizeof(*mv) * 2);
1167 // FIXME cache this value and reuse for other subblocks
1168 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1169 mode == NEWMV ? -1 : sb);
1170 // FIXME maybe move this code into find_ref_mvs()
1171 if ((mode == NEWMV || sb == -1) &&
1172 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1186 if (mode == NEWMV) {
1187 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1188 s->prob.p.mv_joint);
1190 s->counts.mv_joint[j]++;
1191 if (j >= MV_JOINT_V)
1192 mv[0].y += read_mv_component(s, 0, hp);
1194 mv[0].x += read_mv_component(s, 1, hp);
1198 // FIXME cache this value and reuse for other subblocks
1199 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1200 mode == NEWMV ? -1 : sb);
1201 if ((mode == NEWMV || sb == -1) &&
1202 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1216 if (mode == NEWMV) {
1217 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1218 s->prob.p.mv_joint);
1220 s->counts.mv_joint[j]++;
1221 if (j >= MV_JOINT_V)
1222 mv[1].y += read_mv_component(s, 0, hp);
1224 mv[1].x += read_mv_component(s, 1, hp);
1230 static void decode_mode(AVCodecContext *ctx)
1232 static const uint8_t left_ctx[N_BS_SIZES] = {
1233 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1235 static const uint8_t above_ctx[N_BS_SIZES] = {
1236 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1238 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1239 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1240 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1242 VP9Context *s = ctx->priv_data;
1244 int row = s->row, col = s->col, row7 = s->row7;
1245 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1246 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1247 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1248 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1250 if (!s->segmentation.enabled) {
1252 } else if (s->keyframe || s->intraonly) {
1253 b->seg_id = s->segmentation.update_map ?
1254 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg) : 0;
1255 } else if (!s->segmentation.update_map ||
1256 (s->segmentation.temporal &&
1257 vp56_rac_get_prob_branchy(&s->c,
1258 s->prob.segpred[s->above_segpred_ctx[col] +
1259 s->left_segpred_ctx[row7]]))) {
1261 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1263 if (!s->last_uses_2pass)
1264 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1265 for (y = 0; y < h4; y++)
1266 for (x = 0; x < w4; x++)
1267 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1268 av_assert1(pred < 8);
1271 memset(&s->above_segpred_ctx[col], 1, w4);
1272 memset(&s->left_segpred_ctx[row7], 1, h4);
1274 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1277 memset(&s->above_segpred_ctx[col], 0, w4);
1278 memset(&s->left_segpred_ctx[row7], 0, h4);
1280 if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
1281 uint8_t *segmap = s->frames[CUR_FRAME].segmentation_map;
1283 for (y = 0; y < h4; y++)
1284 memset(&segmap[(y + row) * 8 * s->sb_cols + col], b->seg_id, w4);
1287 b->skip = s->segmentation.enabled &&
1288 s->segmentation.feat[b->seg_id].skip_enabled;
1290 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1291 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1292 s->counts.skip[c][b->skip]++;
1295 if (s->keyframe || s->intraonly) {
1297 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1298 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1302 if (have_a && have_l) {
1303 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1306 c = have_a ? 2 * s->above_intra_ctx[col] :
1307 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1309 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1310 s->counts.intra[c][bit]++;
1314 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1318 c = (s->above_skip_ctx[col] ? max_tx :
1319 s->above_txfm_ctx[col]) +
1320 (s->left_skip_ctx[row7] ? max_tx :
1321 s->left_txfm_ctx[row7]) > max_tx;
1323 c = s->above_skip_ctx[col] ? 1 :
1324 (s->above_txfm_ctx[col] * 2 > max_tx);
1326 } else if (have_l) {
1327 c = s->left_skip_ctx[row7] ? 1 :
1328 (s->left_txfm_ctx[row7] * 2 > max_tx);
1334 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1336 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1338 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1340 s->counts.tx32p[c][b->tx]++;
1343 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1345 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1346 s->counts.tx16p[c][b->tx]++;
1349 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1350 s->counts.tx8p[c][b->tx]++;
1357 b->tx = FFMIN(max_tx, s->txfmmode);
1360 if (s->keyframe || s->intraonly) {
1361 uint8_t *a = &s->above_mode_ctx[col * 2];
1362 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1365 if (b->bs > BS_8x8) {
1366 // FIXME the memory storage intermediates here aren't really
1367 // necessary, they're just there to make the code slightly
1369 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1370 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1371 if (b->bs != BS_8x4) {
1372 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1373 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1374 l[0] = a[1] = b->mode[1];
1376 l[0] = a[1] = b->mode[1] = b->mode[0];
1378 if (b->bs != BS_4x8) {
1379 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1380 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1381 if (b->bs != BS_8x4) {
1382 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1383 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1384 l[1] = a[1] = b->mode[3];
1386 l[1] = a[1] = b->mode[3] = b->mode[2];
1389 b->mode[2] = b->mode[0];
1390 l[1] = a[1] = b->mode[3] = b->mode[1];
1393 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1394 vp9_default_kf_ymode_probs[*a][*l]);
1395 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1396 // FIXME this can probably be optimized
1397 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1398 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1400 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1401 vp9_default_kf_uvmode_probs[b->mode[3]]);
1402 } else if (b->intra) {
1404 if (b->bs > BS_8x8) {
1405 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1406 s->prob.p.y_mode[0]);
1407 s->counts.y_mode[0][b->mode[0]]++;
1408 if (b->bs != BS_8x4) {
1409 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1410 s->prob.p.y_mode[0]);
1411 s->counts.y_mode[0][b->mode[1]]++;
1413 b->mode[1] = b->mode[0];
1415 if (b->bs != BS_4x8) {
1416 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1417 s->prob.p.y_mode[0]);
1418 s->counts.y_mode[0][b->mode[2]]++;
1419 if (b->bs != BS_8x4) {
1420 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1421 s->prob.p.y_mode[0]);
1422 s->counts.y_mode[0][b->mode[3]]++;
1424 b->mode[3] = b->mode[2];
1427 b->mode[2] = b->mode[0];
1428 b->mode[3] = b->mode[1];
1431 static const uint8_t size_group[10] = {
1432 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1434 int sz = size_group[b->bs];
1436 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1437 s->prob.p.y_mode[sz]);
1438 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1439 s->counts.y_mode[sz][b->mode[3]]++;
1441 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1442 s->prob.p.uv_mode[b->mode[3]]);
1443 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1445 static const uint8_t inter_mode_ctx_lut[14][14] = {
1446 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1447 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1448 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1449 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1450 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1451 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1452 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1453 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1454 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1455 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1456 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1457 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1458 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1459 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1462 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1463 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1465 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1467 // read comp_pred flag
1468 if (s->comppredmode != PRED_SWITCHABLE) {
1469 b->comp = s->comppredmode == PRED_COMPREF;
1473 // FIXME add intra as ref=0xff (or -1) to make these easier?
1476 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1478 } else if (s->above_comp_ctx[col]) {
1479 c = 2 + (s->left_intra_ctx[row7] ||
1480 s->left_ref_ctx[row7] == s->fixcompref);
1481 } else if (s->left_comp_ctx[row7]) {
1482 c = 2 + (s->above_intra_ctx[col] ||
1483 s->above_ref_ctx[col] == s->fixcompref);
1485 c = (!s->above_intra_ctx[col] &&
1486 s->above_ref_ctx[col] == s->fixcompref) ^
1487 (!s->left_intra_ctx[row7] &&
1488 s->left_ref_ctx[row & 7] == s->fixcompref);
1491 c = s->above_comp_ctx[col] ? 3 :
1492 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1494 } else if (have_l) {
1495 c = s->left_comp_ctx[row7] ? 3 :
1496 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1500 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1501 s->counts.comp[c][b->comp]++;
1504 // read actual references
1505 // FIXME probably cache a few variables here to prevent repetitive
1506 // memory accesses below
1507 if (b->comp) /* two references */ {
1508 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1510 b->ref[fix_idx] = s->fixcompref;
1511 // FIXME can this codeblob be replaced by some sort of LUT?
1514 if (s->above_intra_ctx[col]) {
1515 if (s->left_intra_ctx[row7]) {
1518 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1520 } else if (s->left_intra_ctx[row7]) {
1521 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1523 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1525 if (refl == refa && refa == s->varcompref[1]) {
1527 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1528 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1529 (refl == s->fixcompref && refa == s->varcompref[0])) {
1532 c = (refa == refl) ? 3 : 1;
1534 } else if (!s->left_comp_ctx[row7]) {
1535 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1538 c = (refl == s->varcompref[1] &&
1539 refa != s->varcompref[1]) ? 2 : 4;
1541 } else if (!s->above_comp_ctx[col]) {
1542 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1545 c = (refa == s->varcompref[1] &&
1546 refl != s->varcompref[1]) ? 2 : 4;
1549 c = (refl == refa) ? 4 : 2;
1553 if (s->above_intra_ctx[col]) {
1555 } else if (s->above_comp_ctx[col]) {
1556 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1558 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1561 } else if (have_l) {
1562 if (s->left_intra_ctx[row7]) {
1564 } else if (s->left_comp_ctx[row7]) {
1565 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1567 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1572 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1573 b->ref[var_idx] = s->varcompref[bit];
1574 s->counts.comp_ref[c][bit]++;
1575 } else /* single reference */ {
1578 if (have_a && !s->above_intra_ctx[col]) {
1579 if (have_l && !s->left_intra_ctx[row7]) {
1580 if (s->left_comp_ctx[row7]) {
1581 if (s->above_comp_ctx[col]) {
1582 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1583 !s->above_ref_ctx[col]);
1585 c = (3 * !s->above_ref_ctx[col]) +
1586 (!s->fixcompref || !s->left_ref_ctx[row7]);
1588 } else if (s->above_comp_ctx[col]) {
1589 c = (3 * !s->left_ref_ctx[row7]) +
1590 (!s->fixcompref || !s->above_ref_ctx[col]);
1592 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1594 } else if (s->above_intra_ctx[col]) {
1596 } else if (s->above_comp_ctx[col]) {
1597 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1599 c = 4 * (!s->above_ref_ctx[col]);
1601 } else if (have_l && !s->left_intra_ctx[row7]) {
1602 if (s->left_intra_ctx[row7]) {
1604 } else if (s->left_comp_ctx[row7]) {
1605 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1607 c = 4 * (!s->left_ref_ctx[row7]);
1612 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1613 s->counts.single_ref[c][0][bit]++;
1617 // FIXME can this codeblob be replaced by some sort of LUT?
1620 if (s->left_intra_ctx[row7]) {
1621 if (s->above_intra_ctx[col]) {
1623 } else if (s->above_comp_ctx[col]) {
1624 c = 1 + 2 * (s->fixcompref == 1 ||
1625 s->above_ref_ctx[col] == 1);
1626 } else if (!s->above_ref_ctx[col]) {
1629 c = 4 * (s->above_ref_ctx[col] == 1);
1631 } else if (s->above_intra_ctx[col]) {
1632 if (s->left_intra_ctx[row7]) {
1634 } else if (s->left_comp_ctx[row7]) {
1635 c = 1 + 2 * (s->fixcompref == 1 ||
1636 s->left_ref_ctx[row7] == 1);
1637 } else if (!s->left_ref_ctx[row7]) {
1640 c = 4 * (s->left_ref_ctx[row7] == 1);
1642 } else if (s->above_comp_ctx[col]) {
1643 if (s->left_comp_ctx[row7]) {
1644 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1645 c = 3 * (s->fixcompref == 1 ||
1646 s->left_ref_ctx[row7] == 1);
1650 } else if (!s->left_ref_ctx[row7]) {
1651 c = 1 + 2 * (s->fixcompref == 1 ||
1652 s->above_ref_ctx[col] == 1);
1654 c = 3 * (s->left_ref_ctx[row7] == 1) +
1655 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1657 } else if (s->left_comp_ctx[row7]) {
1658 if (!s->above_ref_ctx[col]) {
1659 c = 1 + 2 * (s->fixcompref == 1 ||
1660 s->left_ref_ctx[row7] == 1);
1662 c = 3 * (s->above_ref_ctx[col] == 1) +
1663 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1665 } else if (!s->above_ref_ctx[col]) {
1666 if (!s->left_ref_ctx[row7]) {
1669 c = 4 * (s->left_ref_ctx[row7] == 1);
1671 } else if (!s->left_ref_ctx[row7]) {
1672 c = 4 * (s->above_ref_ctx[col] == 1);
1674 c = 2 * (s->left_ref_ctx[row7] == 1) +
1675 2 * (s->above_ref_ctx[col] == 1);
1678 if (s->above_intra_ctx[col] ||
1679 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1681 } else if (s->above_comp_ctx[col]) {
1682 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1684 c = 4 * (s->above_ref_ctx[col] == 1);
1687 } else if (have_l) {
1688 if (s->left_intra_ctx[row7] ||
1689 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1691 } else if (s->left_comp_ctx[row7]) {
1692 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1694 c = 4 * (s->left_ref_ctx[row7] == 1);
1699 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1700 s->counts.single_ref[c][1][bit]++;
1701 b->ref[0] = 1 + bit;
1706 if (b->bs <= BS_8x8) {
1707 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1708 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1710 static const uint8_t off[10] = {
1711 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1714 // FIXME this needs to use the LUT tables from find_ref_mvs
1715 // because not all are -1,0/0,-1
1716 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1717 [s->left_mode_ctx[row7 + off[b->bs]]];
1719 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1720 s->prob.p.mv_mode[c]);
1721 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1722 s->counts.mv_mode[c][b->mode[0] - 10]++;
1726 if (s->filtermode == FILTER_SWITCHABLE) {
1729 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1730 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1731 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1732 s->left_filter_ctx[row7] : 3;
1734 c = s->above_filter_ctx[col];
1736 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1737 c = s->left_filter_ctx[row7];
1742 b->filter = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1743 s->prob.p.filter[c]);
1744 s->counts.filter[c][b->filter]++;
1746 b->filter = s->filtermode;
1749 if (b->bs > BS_8x8) {
1750 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1752 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1753 s->prob.p.mv_mode[c]);
1754 s->counts.mv_mode[c][b->mode[0] - 10]++;
1755 fill_mv(s, b->mv[0], b->mode[0], 0);
1757 if (b->bs != BS_8x4) {
1758 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1759 s->prob.p.mv_mode[c]);
1760 s->counts.mv_mode[c][b->mode[1] - 10]++;
1761 fill_mv(s, b->mv[1], b->mode[1], 1);
1763 b->mode[1] = b->mode[0];
1764 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1765 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1768 if (b->bs != BS_4x8) {
1769 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1770 s->prob.p.mv_mode[c]);
1771 s->counts.mv_mode[c][b->mode[2] - 10]++;
1772 fill_mv(s, b->mv[2], b->mode[2], 2);
1774 if (b->bs != BS_8x4) {
1775 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1776 s->prob.p.mv_mode[c]);
1777 s->counts.mv_mode[c][b->mode[3] - 10]++;
1778 fill_mv(s, b->mv[3], b->mode[3], 3);
1780 b->mode[3] = b->mode[2];
1781 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1782 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1785 b->mode[2] = b->mode[0];
1786 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1787 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1788 b->mode[3] = b->mode[1];
1789 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1790 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1793 fill_mv(s, b->mv[0], b->mode[0], -1);
1794 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1795 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1796 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1797 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1798 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1799 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1803 // FIXME this can probably be optimized
1804 memset(&s->above_skip_ctx[col], b->skip, w4);
1805 memset(&s->left_skip_ctx[row7], b->skip, h4);
1806 memset(&s->above_txfm_ctx[col], b->tx, w4);
1807 memset(&s->left_txfm_ctx[row7], b->tx, h4);
1808 memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
1809 memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
1810 if (!s->keyframe && !s->intraonly) {
1811 memset(&s->above_intra_ctx[col], b->intra, w4);
1812 memset(&s->left_intra_ctx[row7], b->intra, h4);
1813 memset(&s->above_comp_ctx[col], b->comp, w4);
1814 memset(&s->left_comp_ctx[row7], b->comp, h4);
1815 memset(&s->above_mode_ctx[col], b->mode[3], w4);
1816 memset(&s->left_mode_ctx[row7], b->mode[3], h4);
1817 if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
1818 memset(&s->above_filter_ctx[col], b->filter, w4);
1819 memset(&s->left_filter_ctx[row7], b->filter, h4);
1820 b->filter = vp9_filter_lut[b->filter];
1822 if (b->bs > BS_8x8) {
1823 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1825 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1826 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1827 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1828 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1829 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1830 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1831 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1832 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1834 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1836 for (n = 0; n < w4 * 2; n++) {
1837 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1838 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1840 for (n = 0; n < h4 * 2; n++) {
1841 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1842 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1846 if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
1847 // as a direct check in above branches
1848 int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1850 memset(&s->above_ref_ctx[col], vref, w4);
1851 memset(&s->left_ref_ctx[row7], vref, h4);
1856 for (y = 0; y < h4; y++) {
1857 int x, o = (row + y) * s->sb_cols * 8 + col;
1858 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1861 for (x = 0; x < w4; x++) {
1865 } else if (b->comp) {
1866 for (x = 0; x < w4; x++) {
1867 mv[x].ref[0] = b->ref[0];
1868 mv[x].ref[1] = b->ref[1];
1869 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
1870 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
1873 for (x = 0; x < w4; x++) {
1874 mv[x].ref[0] = b->ref[0];
1876 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
1882 // FIXME remove tx argument, and merge cnt/eob arguments?
1883 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
1884 enum TxfmMode tx, unsigned (*cnt)[6][3],
1885 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
1886 int nnz, const int16_t *scan, const int16_t (*nb)[2],
1887 const int16_t *band_counts, const int16_t *qmul)
1889 int i = 0, band = 0, band_left = band_counts[band];
1890 uint8_t *tp = p[0][nnz];
1891 uint8_t cache[1024];
1896 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
1897 eob[band][nnz][val]++;
1902 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
1903 cnt[band][nnz][0]++;
1905 band_left = band_counts[++band];
1907 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1909 if (++i == n_coeffs)
1910 break; //invalid input; blocks should end with EOB
1915 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
1916 cnt[band][nnz][1]++;
1920 // fill in p[3-10] (model fill) - only once per frame for each pos
1922 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
1924 cnt[band][nnz][2]++;
1925 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
1926 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
1927 cache[rc] = val = 2;
1929 val = 3 + vp56_rac_get_prob(c, tp[5]);
1932 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
1934 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
1935 val = 5 + vp56_rac_get_prob(c, 159);
1937 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
1938 val += vp56_rac_get_prob(c, 145);
1942 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
1943 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
1944 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
1945 val += (vp56_rac_get_prob(c, 148) << 1);
1946 val += vp56_rac_get_prob(c, 140);
1948 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
1949 val += (vp56_rac_get_prob(c, 155) << 2);
1950 val += (vp56_rac_get_prob(c, 140) << 1);
1951 val += vp56_rac_get_prob(c, 135);
1953 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
1954 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
1955 val += (vp56_rac_get_prob(c, 157) << 3);
1956 val += (vp56_rac_get_prob(c, 141) << 2);
1957 val += (vp56_rac_get_prob(c, 134) << 1);
1958 val += vp56_rac_get_prob(c, 130);
1960 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
1961 val += (vp56_rac_get_prob(c, 254) << 12);
1962 val += (vp56_rac_get_prob(c, 254) << 11);
1963 val += (vp56_rac_get_prob(c, 252) << 10);
1964 val += (vp56_rac_get_prob(c, 249) << 9);
1965 val += (vp56_rac_get_prob(c, 243) << 8);
1966 val += (vp56_rac_get_prob(c, 230) << 7);
1967 val += (vp56_rac_get_prob(c, 196) << 6);
1968 val += (vp56_rac_get_prob(c, 177) << 5);
1969 val += (vp56_rac_get_prob(c, 153) << 4);
1970 val += (vp56_rac_get_prob(c, 140) << 3);
1971 val += (vp56_rac_get_prob(c, 133) << 2);
1972 val += (vp56_rac_get_prob(c, 130) << 1);
1973 val += vp56_rac_get_prob(c, 129);
1978 band_left = band_counts[++band];
1979 if (tx == TX_32X32) // FIXME slow
1980 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
1982 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
1983 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1985 } while (++i < n_coeffs);
1990 static void decode_coeffs(AVCodecContext *ctx)
1992 VP9Context *s = ctx->priv_data;
1994 int row = s->row, col = s->col;
1995 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
1996 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
1997 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
1998 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
1999 int end_x = FFMIN(2 * (s->cols - col), w4);
2000 int end_y = FFMIN(2 * (s->rows - row), h4);
2001 int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
2002 int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
2003 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2004 int tx = 4 * s->lossless + b->tx;
2005 const int16_t * const *yscans = vp9_scans[tx];
2006 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2007 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2008 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2009 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2010 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2011 static const int16_t band_counts[4][8] = {
2012 { 1, 2, 3, 4, 3, 16 - 13 },
2013 { 1, 2, 3, 4, 11, 64 - 21 },
2014 { 1, 2, 3, 4, 11, 256 - 21 },
2015 { 1, 2, 3, 4, 11, 1024 - 21 },
2017 const int16_t *y_band_counts = band_counts[b->tx];
2018 const int16_t *uv_band_counts = band_counts[b->uvtx];
2021 if (b->tx > TX_4X4) { // FIXME slow
2022 for (y = 0; y < end_y; y += step1d)
2023 for (x = 1; x < step1d; x++)
2025 for (x = 0; x < end_x; x += step1d)
2026 for (y = 1; y < step1d; y++)
2029 for (n = 0, y = 0; y < end_y; y += step1d) {
2030 for (x = 0; x < end_x; x += step1d, n += step) {
2031 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
2034 int nnz = a[x] + l[y];
2035 res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
2036 b->tx, c, e, p, nnz, yscans[txtp],
2037 ynbs[txtp], y_band_counts, qmul[0]);
2038 a[x] = l[y] = !!res;
2039 if (b->tx > TX_8X8) {
2040 AV_WN16A(&s->eob[n], res);
2046 if (b->tx > TX_4X4) { // FIXME slow
2047 for (y = 0; y < end_y; y += step1d)
2048 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
2049 for (x = 0; x < end_x; x += step1d)
2050 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
2053 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2054 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2055 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2060 for (pl = 0; pl < 2; pl++) {
2061 a = &s->above_uv_nnz_ctx[pl][col];
2062 l = &s->left_uv_nnz_ctx[pl][row & 7];
2063 if (b->uvtx > TX_4X4) { // FIXME slow
2064 for (y = 0; y < end_y; y += uvstep1d)
2065 for (x = 1; x < uvstep1d; x++)
2067 for (x = 0; x < end_x; x += uvstep1d)
2068 for (y = 1; y < uvstep1d; y++)
2071 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2072 for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
2073 int nnz = a[x] + l[y];
2074 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
2075 16 * uvstep, b->uvtx, c, e, p, nnz,
2076 uvscan, uvnb, uv_band_counts, qmul[1]);
2077 a[x] = l[y] = !!res;
2078 if (b->uvtx > TX_8X8) {
2079 AV_WN16A(&s->uveob[pl][n], res);
2081 s->uveob[pl][n] = res;
2085 if (b->uvtx > TX_4X4) { // FIXME slow
2086 for (y = 0; y < end_y; y += uvstep1d)
2087 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
2088 for (x = 0; x < end_x; x += uvstep1d)
2089 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
2094 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2095 uint8_t *dst_edge, ptrdiff_t stride_edge,
2096 uint8_t *dst_inner, ptrdiff_t stride_inner,
2097 uint8_t *l, int col, int x, int w,
2098 int row, int y, enum TxfmMode tx,
2101 int have_top = row > 0 || y > 0;
2102 int have_left = col > s->tiling.tile_col_start || x > 0;
2103 int have_right = x < w - 1;
2104 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2105 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2106 { DC_127_PRED, VERT_PRED } },
2107 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2108 { HOR_PRED, HOR_PRED } },
2109 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2110 { LEFT_DC_PRED, DC_PRED } },
2111 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2112 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2113 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2114 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2115 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2116 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2117 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2118 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2119 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2120 { DC_127_PRED, VERT_LEFT_PRED } },
2121 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2122 { HOR_UP_PRED, HOR_UP_PRED } },
2123 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2124 { HOR_PRED, TM_VP8_PRED } },
2126 static const struct {
2127 uint8_t needs_left:1;
2128 uint8_t needs_top:1;
2129 uint8_t needs_topleft:1;
2130 uint8_t needs_topright:1;
2131 } edges[N_INTRA_PRED_MODES] = {
2132 [VERT_PRED] = { .needs_top = 1 },
2133 [HOR_PRED] = { .needs_left = 1 },
2134 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2135 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2136 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2137 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2138 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2139 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2140 [HOR_UP_PRED] = { .needs_left = 1 },
2141 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2142 [LEFT_DC_PRED] = { .needs_left = 1 },
2143 [TOP_DC_PRED] = { .needs_top = 1 },
2144 [DC_128_PRED] = { 0 },
2145 [DC_127_PRED] = { 0 },
2146 [DC_129_PRED] = { 0 }
2149 av_assert2(mode >= 0 && mode < 10);
2150 mode = mode_conv[mode][have_left][have_top];
2151 if (edges[mode].needs_top) {
2152 uint8_t *top, *topleft;
2153 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2154 int n_px_need_tr = 0;
2156 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2159 // if top of sb64-row, use s->intra_pred_data[] instead of
2160 // dst[-stride] for intra prediction (it contains pre- instead of
2161 // post-loopfilter data)
2163 top = !(row & 7) && !y ?
2164 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2165 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2167 topleft = !(row & 7) && !y ?
2168 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2169 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2170 &dst_inner[-stride_inner];
2174 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2175 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2176 n_px_need + n_px_need_tr <= n_px_have) {
2180 if (n_px_need <= n_px_have) {
2181 memcpy(*a, top, n_px_need);
2183 memcpy(*a, top, n_px_have);
2184 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2185 n_px_need - n_px_have);
2188 memset(*a, 127, n_px_need);
2190 if (edges[mode].needs_topleft) {
2191 if (have_left && have_top) {
2192 (*a)[-1] = topleft[-1];
2194 (*a)[-1] = have_top ? 129 : 127;
2197 if (tx == TX_4X4 && edges[mode].needs_topright) {
2198 if (have_top && have_right &&
2199 n_px_need + n_px_need_tr <= n_px_have) {
2200 memcpy(&(*a)[4], &top[4], 4);
2202 memset(&(*a)[4], (*a)[3], 4);
2207 if (edges[mode].needs_left) {
2209 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2210 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2211 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2213 if (n_px_need <= n_px_have) {
2214 for (i = 0; i < n_px_need; i++)
2215 l[i] = dst[i * stride - 1];
2217 for (i = 0; i < n_px_have; i++)
2218 l[i] = dst[i * stride - 1];
2219 memset(&l[i], l[i - 1], n_px_need - n_px_have);
2222 memset(l, 129, 4 << tx);
2229 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2231 VP9Context *s = ctx->priv_data;
2233 int row = s->row, col = s->col;
2234 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2235 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2236 int end_x = FFMIN(2 * (s->cols - col), w4);
2237 int end_y = FFMIN(2 * (s->rows - row), h4);
2238 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2239 int uvstep1d = 1 << b->uvtx, p;
2240 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2242 for (n = 0, y = 0; y < end_y; y += step1d) {
2243 uint8_t *ptr = dst, *ptr_r = dst_r;
2244 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2245 ptr_r += 4 * step1d, n += step) {
2246 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2248 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2249 uint8_t *a = &a_buf[16], l[32];
2250 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2251 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2253 mode = check_intra_mode(s, mode, &a, ptr_r,
2254 s->frames[CUR_FRAME].tf.f->linesize[0],
2255 ptr, s->y_stride, l,
2256 col, x, w4, row, y, b->tx, 0);
2257 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2259 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2260 s->block + 16 * n, eob);
2262 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2263 dst += 4 * step1d * s->y_stride;
2271 step = 1 << (b->uvtx * 2);
2272 for (p = 0; p < 2; p++) {
2273 dst = s->dst[1 + p];
2274 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2275 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2276 uint8_t *ptr = dst, *ptr_r = dst_r;
2277 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2278 ptr_r += 4 * uvstep1d, n += step) {
2279 int mode = b->uvmode;
2280 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2281 uint8_t *a = &a_buf[16], l[32];
2282 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2284 mode = check_intra_mode(s, mode, &a, ptr_r,
2285 s->frames[CUR_FRAME].tf.f->linesize[1],
2286 ptr, s->uv_stride, l,
2287 col, x, w4, row, y, b->uvtx, p + 1);
2288 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2290 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2291 s->uvblock[p] + 16 * n, eob);
2293 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2294 dst += 4 * uvstep1d * s->uv_stride;
2299 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2300 uint8_t *dst, ptrdiff_t dst_stride,
2301 const uint8_t *ref, ptrdiff_t ref_stride,
2302 ThreadFrame *ref_frame,
2303 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2304 int bw, int bh, int w, int h)
2306 int mx = mv->x, my = mv->y, th;
2310 ref += y * ref_stride + x;
2313 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2314 // we use +7 because the last 7 pixels of each sbrow can be changed in
2315 // the longest loopfilter of the next sbrow
2316 th = (y + bh + 4 * !!my + 7) >> 6;
2317 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2318 if (x < !!mx * 3 || y < !!my * 3 ||
2319 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2320 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2321 ref - !!my * 3 * ref_stride - !!mx * 3,
2323 bw + !!mx * 7, bh + !!my * 7,
2324 x - !!mx * 3, y - !!my * 3, w, h);
2325 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2328 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2331 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2332 uint8_t *dst_u, uint8_t *dst_v,
2333 ptrdiff_t dst_stride,
2334 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2335 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2336 ThreadFrame *ref_frame,
2337 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2338 int bw, int bh, int w, int h)
2340 int mx = mv->x, my = mv->y, th;
2344 ref_u += y * src_stride_u + x;
2345 ref_v += y * src_stride_v + x;
2348 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2349 // we use +7 because the last 7 pixels of each sbrow can be changed in
2350 // the longest loopfilter of the next sbrow
2351 th = (y + bh + 4 * !!my + 7) >> 5;
2352 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2353 if (x < !!mx * 3 || y < !!my * 3 ||
2354 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2355 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2356 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2358 bw + !!mx * 7, bh + !!my * 7,
2359 x - !!mx * 3, y - !!my * 3, w, h);
2360 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2361 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2363 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2364 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2366 bw + !!mx * 7, bh + !!my * 7,
2367 x - !!mx * 3, y - !!my * 3, w, h);
2368 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2369 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2371 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2372 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2376 static void inter_recon(AVCodecContext *ctx)
2378 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2379 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2380 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2382 VP9Context *s = ctx->priv_data;
2384 int row = s->row, col = s->col;
2385 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]];
2386 AVFrame *ref1 = tref1->f;
2387 ThreadFrame *tref2 = b->comp ? &s->refs[s->refidx[b->ref[1]]] : NULL;
2388 AVFrame *ref2 = b->comp ? tref2->f : NULL;
2389 int w = ctx->width, h = ctx->height;
2390 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2393 if (b->bs > BS_8x8) {
2394 if (b->bs == BS_8x4) {
2395 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2396 ref1->data[0], ref1->linesize[0], tref1,
2397 row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
2398 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2399 s->dst[0] + 4 * ls_y, ls_y,
2400 ref1->data[0], ref1->linesize[0], tref1,
2401 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
2404 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2405 ref2->data[0], ref2->linesize[0], tref2,
2406 row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
2407 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2408 s->dst[0] + 4 * ls_y, ls_y,
2409 ref2->data[0], ref2->linesize[0], tref2,
2410 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
2412 } else if (b->bs == BS_4x8) {
2413 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2414 ref1->data[0], ref1->linesize[0], tref1,
2415 row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
2416 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2417 ref1->data[0], ref1->linesize[0], tref1,
2418 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
2421 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2422 ref2->data[0], ref2->linesize[0], tref2,
2423 row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
2424 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2425 ref2->data[0], ref2->linesize[0], tref2,
2426 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
2429 av_assert2(b->bs == BS_4x4);
2431 // FIXME if two horizontally adjacent blocks have the same MV,
2432 // do a w8 instead of a w4 call
2433 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2434 ref1->data[0], ref1->linesize[0], tref1,
2435 row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
2436 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2437 ref1->data[0], ref1->linesize[0], tref1,
2438 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
2439 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2440 s->dst[0] + 4 * ls_y, ls_y,
2441 ref1->data[0], ref1->linesize[0], tref1,
2442 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
2443 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2444 s->dst[0] + 4 * ls_y + 4, ls_y,
2445 ref1->data[0], ref1->linesize[0], tref1,
2446 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
2449 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2450 ref2->data[0], ref2->linesize[0], tref2,
2451 row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
2452 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2453 ref2->data[0], ref2->linesize[0], tref2,
2454 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
2455 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2456 s->dst[0] + 4 * ls_y, ls_y,
2457 ref2->data[0], ref2->linesize[0], tref2,
2458 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
2459 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2460 s->dst[0] + 4 * ls_y + 4, ls_y,
2461 ref2->data[0], ref2->linesize[0], tref2,
2462 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
2466 int bwl = bwlog_tab[0][b->bs];
2467 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2469 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2470 ref1->data[0], ref1->linesize[0], tref1,
2471 row << 3, col << 3, &b->mv[0][0],bw, bh, w, h);
2474 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2475 ref2->data[0], ref2->linesize[0], tref2,
2476 row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
2481 int bwl = bwlog_tab[1][b->bs];
2482 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2487 if (b->bs > BS_8x8) {
2488 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2489 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2494 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2495 s->dst[1], s->dst[2], ls_uv,
2496 ref1->data[1], ref1->linesize[1],
2497 ref1->data[2], ref1->linesize[2], tref1,
2498 row << 2, col << 2, &mvuv, bw, bh, w, h);
2501 if (b->bs > BS_8x8) {
2502 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2503 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2507 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2508 s->dst[1], s->dst[2], ls_uv,
2509 ref2->data[1], ref2->linesize[1],
2510 ref2->data[2], ref2->linesize[2], tref2,
2511 row << 2, col << 2, &mvuv, bw, bh, w, h);
2516 /* mostly copied intra_reconn() */
2518 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2519 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2520 int end_x = FFMIN(2 * (s->cols - col), w4);
2521 int end_y = FFMIN(2 * (s->rows - row), h4);
2522 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2523 int uvstep1d = 1 << b->uvtx, p;
2524 uint8_t *dst = s->dst[0];
2527 for (n = 0, y = 0; y < end_y; y += step1d) {
2529 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2530 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2533 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2534 s->block + 16 * n, eob);
2536 dst += 4 * s->y_stride * step1d;
2544 step = 1 << (b->uvtx * 2);
2545 for (p = 0; p < 2; p++) {
2546 dst = s->dst[p + 1];
2547 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2549 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2550 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2553 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2554 s->uvblock[p] + 16 * n, eob);
2556 dst += 4 * uvstep1d * s->uv_stride;
2562 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2563 int row_and_7, int col_and_7,
2564 int w, int h, int col_end, int row_end,
2565 enum TxfmMode tx, int skip_inter)
2567 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2568 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2569 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2570 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2572 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2573 // edges. This means that for UV, we work on two subsampled blocks at
2574 // a time, and we only use the topleft block's mode information to set
2575 // things like block strength. Thus, for any block size smaller than
2576 // 16x16, ignore the odd portion of the block.
2577 if (tx == TX_4X4 && is_uv) {
2592 if (tx == TX_4X4 && !skip_inter) {
2593 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2594 int m_col_odd = (t << (w - 1)) - t;
2596 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2598 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2600 for (y = row_and_7; y < h + row_and_7; y++) {
2601 int col_mask_id = 2 - !(y & 7);
2603 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2604 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2605 // for odd lines, if the odd col is not being filtered,
2606 // skip odd row also:
2613 // if a/c are even row/col and b/d are odd, and d is skipped,
2614 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2615 if ((col_end & 1) && (y & 1)) {
2616 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2618 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2622 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2624 for (y = row_and_7; y < h + row_and_7; y++) {
2625 int col_mask_id = 2 - !(y & 3);
2627 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2628 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2629 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2630 lflvl->mask[is_uv][0][y][3] |= m_col;
2631 lflvl->mask[is_uv][1][y][3] |= m_col;
2635 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2638 int mask_id = (tx == TX_8X8);
2639 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2640 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2641 int m_row = m_col & masks[l2];
2643 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2644 // 8wd loopfilter to prevent going off the visible edge.
2645 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2646 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2647 int m_row_8 = m_row - m_row_16;
2649 for (y = row_and_7; y < h + row_and_7; y++) {
2650 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2651 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2654 for (y = row_and_7; y < h + row_and_7; y++)
2655 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2658 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2659 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2660 lflvl->mask[is_uv][1][y][0] |= m_col;
2661 if (y - row_and_7 == h - 1)
2662 lflvl->mask[is_uv][1][y][1] |= m_col;
2664 for (y = row_and_7; y < h + row_and_7; y += step1d)
2665 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2667 } else if (tx != TX_4X4) {
2670 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2671 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2672 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2673 for (y = row_and_7; y < h + row_and_7; y++)
2674 lflvl->mask[is_uv][0][y][mask_id] |= t;
2676 int t8 = t & 0x01, t4 = t - t8;
2678 for (y = row_and_7; y < h + row_and_7; y++) {
2679 lflvl->mask[is_uv][0][y][2] |= t4;
2680 lflvl->mask[is_uv][0][y][1] |= t8;
2682 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2684 int t8 = t & 0x11, t4 = t - t8;
2686 for (y = row_and_7; y < h + row_and_7; y++) {
2687 lflvl->mask[is_uv][0][y][2] |= t4;
2688 lflvl->mask[is_uv][0][y][1] |= t8;
2690 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2695 static void decode_b(AVCodecContext *ctx, int row, int col,
2696 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2697 enum BlockLevel bl, enum BlockPartition bp)
2699 VP9Context *s = ctx->priv_data;
2701 enum BlockSize bs = bl * 3 + bp;
2702 int y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2704 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2710 s->min_mv.x = -(128 + col * 64);
2711 s->min_mv.y = -(128 + row * 64);
2712 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2713 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2719 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2726 memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
2727 memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
2728 for (pl = 0; pl < 2; pl++) {
2729 memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
2730 memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
2735 s->block += w4 * h4 * 64;
2736 s->uvblock[0] += w4 * h4 * 16;
2737 s->uvblock[1] += w4 * h4 * 16;
2738 s->eob += 4 * w4 * h4;
2739 s->uveob[0] += w4 * h4;
2740 s->uveob[1] += w4 * h4;
2746 // emulated overhangs if the stride of the target buffer can't hold. This
2747 // allows to support emu-edge and so on even if we have large block
2749 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2750 (row + h4) > s->rows;
2751 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2752 (row + h4) > s->rows;
2754 s->dst[0] = s->tmp_y;
2757 s->dst[0] = f->data[0] + yoff;
2758 s->y_stride = f->linesize[0];
2761 s->dst[1] = s->tmp_uv[0];
2762 s->dst[2] = s->tmp_uv[1];
2765 s->dst[1] = f->data[1] + uvoff;
2766 s->dst[2] = f->data[2] + uvoff;
2767 s->uv_stride = f->linesize[1];
2770 intra_recon(ctx, yoff, uvoff);
2775 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2777 for (n = 0; o < w; n++) {
2782 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
2783 s->tmp_y + o, 64, h, 0, 0);
2789 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2791 for (n = 1; o < w; n++) {
2796 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
2797 s->tmp_uv[0] + o, 32, h, 0, 0);
2798 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
2799 s->tmp_uv[1] + o, 32, h, 0, 0);
2805 // pick filter level and find edges to apply filter to
2806 if (s->filter.level &&
2807 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
2808 [b->mode[3] != ZEROMV]) > 0) {
2809 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
2810 int skip_inter = !b->intra && b->skip;
2812 for (y = 0; y < h4; y++)
2813 memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
2814 mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
2815 mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
2816 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
2817 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
2818 b->uvtx, skip_inter);
2820 if (!s->filter.lim_lut[lvl]) {
2821 int sharp = s->filter.sharpness;
2825 limit >>= (sharp + 3) >> 2;
2826 limit = FFMIN(limit, 9 - sharp);
2828 limit = FFMAX(limit, 1);
2830 s->filter.lim_lut[lvl] = limit;
2831 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
2837 s->block += w4 * h4 * 64;
2838 s->uvblock[0] += w4 * h4 * 16;
2839 s->uvblock[1] += w4 * h4 * 16;
2840 s->eob += 4 * w4 * h4;
2841 s->uveob[0] += w4 * h4;
2842 s->uveob[1] += w4 * h4;
2846 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2847 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2849 VP9Context *s = ctx->priv_data;
2850 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
2851 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
2852 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
2853 s->prob.p.partition[bl][c];
2854 enum BlockPartition bp;
2855 ptrdiff_t hbs = 4 >> bl;
2856 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2857 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
2860 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2861 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2862 } else if (col + hbs < s->cols) { // FIXME why not <=?
2863 if (row + hbs < s->rows) { // FIXME why not <=?
2864 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2866 case PARTITION_NONE:
2867 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2870 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2871 yoff += hbs * 8 * y_stride;
2872 uvoff += hbs * 4 * uv_stride;
2873 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
2876 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2879 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
2881 case PARTITION_SPLIT:
2882 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2883 decode_sb(ctx, row, col + hbs, lflvl,
2884 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2885 yoff += hbs * 8 * y_stride;
2886 uvoff += hbs * 4 * uv_stride;
2887 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2888 decode_sb(ctx, row + hbs, col + hbs, lflvl,
2889 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2894 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
2895 bp = PARTITION_SPLIT;
2896 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2897 decode_sb(ctx, row, col + hbs, lflvl,
2898 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2901 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2903 } else if (row + hbs < s->rows) { // FIXME why not <=?
2904 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
2905 bp = PARTITION_SPLIT;
2906 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2907 yoff += hbs * 8 * y_stride;
2908 uvoff += hbs * 4 * uv_stride;
2909 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2912 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2915 bp = PARTITION_SPLIT;
2916 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2918 s->counts.partition[bl][c][bp]++;
2921 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2922 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2924 VP9Context *s = ctx->priv_data;
2926 ptrdiff_t hbs = 4 >> bl;
2927 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2928 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
2931 av_assert2(b->bl == BL_8X8);
2932 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
2933 } else if (s->b->bl == bl) {
2934 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
2935 if (b->bp == PARTITION_H && row + hbs < s->rows) {
2936 yoff += hbs * 8 * y_stride;
2937 uvoff += hbs * 4 * uv_stride;
2938 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
2939 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
2942 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
2945 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2946 if (col + hbs < s->cols) { // FIXME why not <=?
2947 if (row + hbs < s->rows) {
2948 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
2949 uvoff + 4 * hbs, bl + 1);
2950 yoff += hbs * 8 * y_stride;
2951 uvoff += hbs * 4 * uv_stride;
2952 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2953 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
2954 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2958 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
2960 } else if (row + hbs < s->rows) {
2961 yoff += hbs * 8 * y_stride;
2962 uvoff += hbs * 4 * uv_stride;
2963 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2968 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
2969 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
2971 VP9Context *s = ctx->priv_data;
2972 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2973 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
2974 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
2977 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
2978 // if you think of them as acting on a 8x8 block max, we can interleave
2979 // each v/h within the single x loop, but that only works if we work on
2980 // 8 pixel blocks, and we won't always do that (we want at least 16px
2981 // to use SSE2 optimizations, perhaps 32 for AVX2)
2983 // filter edges between columns, Y plane (e.g. block1 | block2)
2984 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
2985 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
2986 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
2987 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
2988 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
2989 unsigned hm = hm1 | hm2 | hm13 | hm23;
2991 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
2993 int L = *l, H = L >> 4;
2994 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2997 if (hmask1[0] & x) {
2998 if (hmask2[0] & x) {
2999 av_assert2(l[8] == L);
3000 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3002 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3004 } else if (hm2 & x) {
3007 E |= s->filter.mblim_lut[L] << 8;
3008 I |= s->filter.lim_lut[L] << 8;
3009 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3011 [0](ptr, ls_y, E, I, H);
3013 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3014 [0](ptr, ls_y, E, I, H);
3017 } else if (hm2 & x) {
3018 int L = l[8], H = L >> 4;
3019 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3022 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3023 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3027 int L = *l, H = L >> 4;
3028 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3033 E |= s->filter.mblim_lut[L] << 8;
3034 I |= s->filter.lim_lut[L] << 8;
3035 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3037 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3039 } else if (hm23 & x) {
3040 int L = l[8], H = L >> 4;
3041 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3043 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3049 // filter edges between rows, Y plane (e.g. ------)
3051 dst = f->data[0] + yoff;
3053 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3054 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3055 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3057 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3060 int L = *l, H = L >> 4;
3061 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3064 if (vmask[0] & (x << 1)) {
3065 av_assert2(l[1] == L);
3066 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3068 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3070 } else if (vm & (x << 1)) {
3073 E |= s->filter.mblim_lut[L] << 8;
3074 I |= s->filter.lim_lut[L] << 8;
3075 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3076 [!!(vmask[1] & (x << 1))]
3077 [1](ptr, ls_y, E, I, H);
3079 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3080 [1](ptr, ls_y, E, I, H);
3082 } else if (vm & (x << 1)) {
3083 int L = l[1], H = L >> 4;
3084 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3086 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3087 [1](ptr + 8, ls_y, E, I, H);
3091 int L = *l, H = L >> 4;
3092 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3094 if (vm3 & (x << 1)) {
3097 E |= s->filter.mblim_lut[L] << 8;
3098 I |= s->filter.lim_lut[L] << 8;
3099 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3101 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3103 } else if (vm3 & (x << 1)) {
3104 int L = l[1], H = L >> 4;
3105 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3107 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3112 // same principle but for U/V planes
3113 for (p = 0; p < 2; p++) {
3115 dst = f->data[1 + p] + uvoff;
3116 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3117 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3118 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3119 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3120 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3122 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3125 int L = *l, H = L >> 4;
3126 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3128 if (hmask1[0] & x) {
3129 if (hmask2[0] & x) {
3130 av_assert2(l[16] == L);
3131 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3133 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3135 } else if (hm2 & x) {
3138 E |= s->filter.mblim_lut[L] << 8;
3139 I |= s->filter.lim_lut[L] << 8;
3140 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3142 [0](ptr, ls_uv, E, I, H);
3144 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3145 [0](ptr, ls_uv, E, I, H);
3147 } else if (hm2 & x) {
3148 int L = l[16], H = L >> 4;
3149 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3151 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3152 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3160 dst = f->data[1 + p] + uvoff;
3161 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3162 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3163 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3165 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3168 int L = *l, H = L >> 4;
3169 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3172 if (vmask[0] & (x << 2)) {
3173 av_assert2(l[2] == L);
3174 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3176 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3178 } else if (vm & (x << 2)) {
3181 E |= s->filter.mblim_lut[L] << 8;
3182 I |= s->filter.lim_lut[L] << 8;
3183 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3184 [!!(vmask[1] & (x << 2))]
3185 [1](ptr, ls_uv, E, I, H);
3187 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3188 [1](ptr, ls_uv, E, I, H);
3190 } else if (vm & (x << 2)) {
3191 int L = l[2], H = L >> 4;
3192 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3194 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3195 [1](ptr + 8, ls_uv, E, I, H);
3205 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3207 int sb_start = ( idx * n) >> log2_n;
3208 int sb_end = ((idx + 1) * n) >> log2_n;
3209 *start = FFMIN(sb_start, n) << 3;
3210 *end = FFMIN(sb_end, n) << 3;
3213 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3214 int max_count, int update_factor)
3216 unsigned ct = ct0 + ct1, p2, p1;
3222 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3223 p2 = av_clip(p2, 1, 255);
3224 ct = FFMIN(ct, max_count);
3225 update_factor = FASTDIV(update_factor * ct, max_count);
3227 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3228 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3231 static void adapt_probs(VP9Context *s)
3234 prob_context *p = &s->prob_ctx[s->framectxid].p;
3235 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3238 for (i = 0; i < 4; i++)
3239 for (j = 0; j < 2; j++)
3240 for (k = 0; k < 2; k++)
3241 for (l = 0; l < 6; l++)
3242 for (m = 0; m < 6; m++) {
3243 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3244 unsigned *e = s->counts.eob[i][j][k][l][m];
3245 unsigned *c = s->counts.coef[i][j][k][l][m];
3247 if (l == 0 && m >= 3) // dc only has 3 pt
3250 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3251 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3252 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3255 if (s->keyframe || s->intraonly) {
3256 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3257 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3258 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3259 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3264 for (i = 0; i < 3; i++)
3265 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3268 for (i = 0; i < 4; i++)
3269 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3272 if (s->comppredmode == PRED_SWITCHABLE) {
3273 for (i = 0; i < 5; i++)
3274 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3278 if (s->comppredmode != PRED_SINGLEREF) {
3279 for (i = 0; i < 5; i++)
3280 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3281 s->counts.comp_ref[i][1], 20, 128);
3284 if (s->comppredmode != PRED_COMPREF) {
3285 for (i = 0; i < 5; i++) {
3286 uint8_t *pp = p->single_ref[i];
3287 unsigned (*c)[2] = s->counts.single_ref[i];
3289 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3290 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3294 // block partitioning
3295 for (i = 0; i < 4; i++)
3296 for (j = 0; j < 4; j++) {
3297 uint8_t *pp = p->partition[i][j];
3298 unsigned *c = s->counts.partition[i][j];
3300 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3301 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3302 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3306 if (s->txfmmode == TX_SWITCHABLE) {
3307 for (i = 0; i < 2; i++) {
3308 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3310 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3311 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3312 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3313 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3314 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3315 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3319 // interpolation filter
3320 if (s->filtermode == FILTER_SWITCHABLE) {
3321 for (i = 0; i < 4; i++) {
3322 uint8_t *pp = p->filter[i];
3323 unsigned *c = s->counts.filter[i];
3325 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3326 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3331 for (i = 0; i < 7; i++) {
3332 uint8_t *pp = p->mv_mode[i];
3333 unsigned *c = s->counts.mv_mode[i];
3335 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3336 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3337 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3342 uint8_t *pp = p->mv_joint;
3343 unsigned *c = s->counts.mv_joint;
3345 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3346 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3347 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3351 for (i = 0; i < 2; i++) {
3353 unsigned *c, (*c2)[2], sum;
3355 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3356 s->counts.mv_comp[i].sign[1], 20, 128);
3358 pp = p->mv_comp[i].classes;
3359 c = s->counts.mv_comp[i].classes;
3360 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3361 adapt_prob(&pp[0], c[0], sum, 20, 128);
3363 adapt_prob(&pp[1], c[1], sum, 20, 128);
3365 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3366 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3368 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3369 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3371 adapt_prob(&pp[6], c[6], sum, 20, 128);
3372 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3373 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3374 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3376 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3377 s->counts.mv_comp[i].class0[1], 20, 128);
3378 pp = p->mv_comp[i].bits;
3379 c2 = s->counts.mv_comp[i].bits;
3380 for (j = 0; j < 10; j++)
3381 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3383 for (j = 0; j < 2; j++) {
3384 pp = p->mv_comp[i].class0_fp[j];
3385 c = s->counts.mv_comp[i].class0_fp[j];
3386 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3387 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3388 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3390 pp = p->mv_comp[i].fp;
3391 c = s->counts.mv_comp[i].fp;
3392 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3393 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3394 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3396 if (s->highprecisionmvs) {
3397 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3398 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3399 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3400 s->counts.mv_comp[i].hp[1], 20, 128);
3405 for (i = 0; i < 4; i++) {
3406 uint8_t *pp = p->y_mode[i];
3407 unsigned *c = s->counts.y_mode[i], sum, s2;
3409 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3410 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3411 sum -= c[TM_VP8_PRED];
3412 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3413 sum -= c[VERT_PRED];
3414 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3415 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3417 adapt_prob(&pp[3], s2, sum, 20, 128);
3419 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3420 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3421 sum -= c[DIAG_DOWN_LEFT_PRED];
3422 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3423 sum -= c[VERT_LEFT_PRED];
3424 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3425 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3429 for (i = 0; i < 10; i++) {
3430 uint8_t *pp = p->uv_mode[i];
3431 unsigned *c = s->counts.uv_mode[i], sum, s2;
3433 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3434 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3435 sum -= c[TM_VP8_PRED];
3436 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3437 sum -= c[VERT_PRED];
3438 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3439 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3441 adapt_prob(&pp[3], s2, sum, 20, 128);
3443 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3444 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3445 sum -= c[DIAG_DOWN_LEFT_PRED];
3446 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3447 sum -= c[VERT_LEFT_PRED];
3448 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3449 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3453 static void free_buffers(VP9Context *s)
3455 av_freep(&s->above_partition_ctx);
3456 av_freep(&s->b_base);
3457 av_freep(&s->block_base);
3460 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3462 VP9Context *s = ctx->priv_data;
3465 for (i = 0; i < 2; i++) {
3466 if (s->frames[i].tf.f->data[0])
3467 vp9_unref_frame(ctx, &s->frames[i]);
3468 av_frame_free(&s->frames[i].tf.f);
3470 for (i = 0; i < 8; i++) {
3471 if (s->refs[i].f->data[0])
3472 ff_thread_release_buffer(ctx, &s->refs[i]);
3473 av_frame_free(&s->refs[i].f);
3474 if (s->next_refs[i].f->data[0])
3475 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3476 av_frame_free(&s->next_refs[i].f);
3486 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3487 int *got_frame, AVPacket *pkt)
3489 const uint8_t *data = pkt->data;
3490 int size = pkt->size;
3491 VP9Context *s = ctx->priv_data;
3492 int res, tile_row, tile_col, i, ref, row, col;
3493 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3496 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3498 } else if (res == 0) {
3499 if (!s->refs[ref].f->data[0]) {
3500 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3501 return AVERROR_INVALIDDATA;
3503 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3511 if (s->frames[LAST_FRAME].tf.f->data[0])
3512 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3513 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3514 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3516 if (s->frames[CUR_FRAME].tf.f->data[0])
3517 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3518 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3520 f = s->frames[CUR_FRAME].tf.f;
3521 f->key_frame = s->keyframe;
3522 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3523 ls_y = f->linesize[0];
3524 ls_uv =f->linesize[1];
3527 for (i = 0; i < 8; i++) {
3528 if (s->next_refs[i].f->data[0])
3529 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3530 if (s->refreshrefmask & (1 << i)) {
3531 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3533 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3539 // main tile decode loop
3540 memset(s->above_partition_ctx, 0, s->cols);
3541 memset(s->above_skip_ctx, 0, s->cols);
3542 if (s->keyframe || s->intraonly) {
3543 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3545 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3547 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3548 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3549 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3550 memset(s->above_segpred_ctx, 0, s->cols);
3551 s->pass = s->uses_2pass =
3552 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3553 if (s->refreshctx && s->parallelmode) {
3556 for (i = 0; i < 4; i++) {
3557 for (j = 0; j < 2; j++)
3558 for (k = 0; k < 2; k++)
3559 for (l = 0; l < 6; l++)
3560 for (m = 0; m < 6; m++)
3561 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3562 s->prob.coef[i][j][k][l][m], 3);
3563 if (s->txfmmode == i)
3566 s->prob_ctx[s->framectxid].p = s->prob.p;
3567 ff_thread_finish_setup(ctx);
3573 s->block = s->block_base;
3574 s->uvblock[0] = s->uvblock_base[0];
3575 s->uvblock[1] = s->uvblock_base[1];
3576 s->eob = s->eob_base;
3577 s->uveob[0] = s->uveob_base[0];
3578 s->uveob[1] = s->uveob_base[1];
3580 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3581 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3582 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3584 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3587 if (tile_col == s->tiling.tile_cols - 1 &&
3588 tile_row == s->tiling.tile_rows - 1) {
3591 tile_size = AV_RB32(data);
3595 if (tile_size > size) {
3596 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3597 return AVERROR_INVALIDDATA;
3599 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3600 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3601 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3602 return AVERROR_INVALIDDATA;
3609 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3610 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3611 struct VP9Filter *lflvl_ptr = s->lflvl;
3612 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3614 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3615 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3616 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3619 memset(s->left_partition_ctx, 0, 8);
3620 memset(s->left_skip_ctx, 0, 8);
3621 if (s->keyframe || s->intraonly) {
3622 memset(s->left_mode_ctx, DC_PRED, 16);
3624 memset(s->left_mode_ctx, NEARESTMV, 8);
3626 memset(s->left_y_nnz_ctx, 0, 16);
3627 memset(s->left_uv_nnz_ctx, 0, 16);
3628 memset(s->left_segpred_ctx, 0, 8);
3630 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3633 for (col = s->tiling.tile_col_start;
3634 col < s->tiling.tile_col_end;
3635 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3636 // FIXME integrate with lf code (i.e. zero after each
3637 // use, similar to invtxfm coefficients, or similar)
3639 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3643 decode_sb_mem(ctx, row, col, lflvl_ptr,
3644 yoff2, uvoff2, BL_64X64);
3646 decode_sb(ctx, row, col, lflvl_ptr,
3647 yoff2, uvoff2, BL_64X64);
3651 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3659 // backup pre-loopfilter reconstruction data for intra
3660 // prediction of next row of sb64s
3661 if (row + 8 < s->rows) {
3662 memcpy(s->intra_pred_data[0],
3663 f->data[0] + yoff + 63 * ls_y,
3665 memcpy(s->intra_pred_data[1],
3666 f->data[1] + uvoff + 31 * ls_uv,
3668 memcpy(s->intra_pred_data[2],
3669 f->data[2] + uvoff + 31 * ls_uv,
3673 // loopfilter one row
3674 if (s->filter.level) {
3677 lflvl_ptr = s->lflvl;
3678 for (col = 0; col < s->cols;
3679 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3680 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3684 // FIXME maybe we can make this more finegrained by running the
3685 // loopfilter per-block instead of after each sbrow
3686 // In fact that would also make intra pred left preparation easier?
3687 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3691 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3693 ff_thread_finish_setup(ctx);
3695 } while (s->pass++ == 1);
3696 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3699 for (i = 0; i < 8; i++) {
3700 if (s->refs[i].f->data[0])
3701 ff_thread_release_buffer(ctx, &s->refs[i]);
3702 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3705 if (!s->invisible) {
3706 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3714 static void vp9_decode_flush(AVCodecContext *ctx)
3716 VP9Context *s = ctx->priv_data;
3719 for (i = 0; i < 2; i++)
3720 vp9_unref_frame(ctx, &s->frames[i]);
3721 for (i = 0; i < 8; i++)
3722 ff_thread_release_buffer(ctx, &s->refs[i]);
3725 static int init_frames(AVCodecContext *ctx)
3727 VP9Context *s = ctx->priv_data;
3730 for (i = 0; i < 2; i++) {
3731 s->frames[i].tf.f = av_frame_alloc();
3732 if (!s->frames[i].tf.f) {
3733 vp9_decode_free(ctx);
3734 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3735 return AVERROR(ENOMEM);
3738 for (i = 0; i < 8; i++) {
3739 s->refs[i].f = av_frame_alloc();
3740 s->next_refs[i].f = av_frame_alloc();
3741 if (!s->refs[i].f || !s->next_refs[i].f) {
3742 vp9_decode_free(ctx);
3743 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3744 return AVERROR(ENOMEM);
3751 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3753 VP9Context *s = ctx->priv_data;
3755 ctx->internal->allocate_progress = 1;
3756 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3757 ff_vp9dsp_init(&s->dsp);
3758 ff_videodsp_init(&s->vdsp, 8);
3759 s->filter.sharpness = -1;
3761 return init_frames(ctx);
3764 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
3766 return init_frames(avctx);
3769 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
3772 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
3774 // detect size changes in other threads
3775 if (s->above_partition_ctx &&
3776 (!ssrc->above_partition_ctx || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
3780 for (i = 0; i < 2; i++) {
3781 if (s->frames[i].tf.f->data[0])
3782 vp9_unref_frame(dst, &s->frames[i]);
3783 if (ssrc->frames[i].tf.f->data[0]) {
3784 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
3788 for (i = 0; i < 8; i++) {
3789 if (s->refs[i].f->data[0])
3790 ff_thread_release_buffer(dst, &s->refs[i]);
3791 if (ssrc->next_refs[i].f->data[0]) {
3792 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
3797 s->invisible = ssrc->invisible;
3798 s->keyframe = ssrc->keyframe;
3799 s->uses_2pass = ssrc->uses_2pass;
3800 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
3801 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
3802 if (ssrc->segmentation.enabled) {
3803 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
3804 sizeof(s->segmentation.feat));
3810 AVCodec ff_vp9_decoder = {
3812 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
3813 .type = AVMEDIA_TYPE_VIDEO,
3814 .id = AV_CODEC_ID_VP9,
3815 .priv_data_size = sizeof(VP9Context),
3816 .init = vp9_decode_init,
3817 .close = vp9_decode_free,
3818 .decode = vp9_decode_frame,
3819 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
3820 .flush = vp9_decode_flush,
3821 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
3822 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),