2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
151 uint8_t absolute_vals;
157 uint8_t skip_enabled;
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 unsigned sb_cols, sb_rows, rows, cols;
173 uint8_t coef[4][2][2][6][6][3];
177 uint8_t coef[4][2][2][6][6][11];
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
194 unsigned mv_joint[4];
197 unsigned classes[11];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
202 unsigned class0_hp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
212 // contextual (left/above) cache
213 uint8_t left_partition_ctx[8], *above_partition_ctx;
214 uint8_t left_mode_ctx[16], *above_mode_ctx;
215 // FIXME maybe merge some of the below in a flags field?
216 uint8_t left_y_nnz_ctx[16], *above_y_nnz_ctx;
217 uint8_t left_uv_nnz_ctx[2][8], *above_uv_nnz_ctx[2];
218 uint8_t left_skip_ctx[8], *above_skip_ctx; // 1bit
219 uint8_t left_txfm_ctx[8], *above_txfm_ctx; // 2bit
220 uint8_t left_segpred_ctx[8], *above_segpred_ctx; // 1bit
221 uint8_t left_intra_ctx[8], *above_intra_ctx; // 1bit
222 uint8_t left_comp_ctx[8], *above_comp_ctx; // 1bit
223 uint8_t left_ref_ctx[8], *above_ref_ctx; // 2bit
224 uint8_t left_filter_ctx[8], *above_filter_ctx;
225 VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
228 uint8_t *intra_pred_data[3];
229 struct VP9Filter *lflvl;
230 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
232 // block reconstruction intermediates
233 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
234 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
235 VP56mv min_mv, max_mv;
236 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
237 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
240 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
242 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
243 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
245 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
246 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
250 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
252 VP9Context *s = ctx->priv_data;
255 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
257 sz = 64 * s->sb_cols * s->sb_rows;
258 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
259 ff_thread_release_buffer(ctx, &f->tf);
260 return AVERROR(ENOMEM);
263 f->segmentation_map = f->extradata->data;
264 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
266 // retain segmentation map if it doesn't update
267 if (s->segmentation.enabled && !s->segmentation.update_map) {
268 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
274 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
276 ff_thread_release_buffer(ctx, &f->tf);
277 av_buffer_unref(&f->extradata);
280 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
284 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
286 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
287 vp9_unref_frame(ctx, dst);
288 return AVERROR(ENOMEM);
291 dst->segmentation_map = src->segmentation_map;
297 static int update_size(AVCodecContext *ctx, int w, int h)
299 VP9Context *s = ctx->priv_data;
302 av_assert0(w > 0 && h > 0);
304 if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
309 s->sb_cols = (w + 63) >> 6;
310 s->sb_rows = (h + 63) >> 6;
311 s->cols = (w + 7) >> 3;
312 s->rows = (h + 7) >> 3;
314 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
315 av_freep(&s->above_partition_ctx);
316 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
318 return AVERROR(ENOMEM);
319 assign(s->above_partition_ctx, uint8_t *, 8);
320 assign(s->above_skip_ctx, uint8_t *, 8);
321 assign(s->above_txfm_ctx, uint8_t *, 8);
322 assign(s->above_mode_ctx, uint8_t *, 16);
323 assign(s->above_y_nnz_ctx, uint8_t *, 16);
324 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
325 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
326 assign(s->intra_pred_data[0], uint8_t *, 64);
327 assign(s->intra_pred_data[1], uint8_t *, 32);
328 assign(s->intra_pred_data[2], uint8_t *, 32);
329 assign(s->above_segpred_ctx, uint8_t *, 8);
330 assign(s->above_intra_ctx, uint8_t *, 8);
331 assign(s->above_comp_ctx, uint8_t *, 8);
332 assign(s->above_ref_ctx, uint8_t *, 8);
333 assign(s->above_filter_ctx, uint8_t *, 8);
334 assign(s->lflvl, struct VP9Filter *, 1);
335 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
339 av_free(s->block_base);
340 if (ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode) {
341 int sbs = s->sb_cols * s->sb_rows;
343 s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
344 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
345 if (!s->b_base || !s->block_base)
346 return AVERROR(ENOMEM);
347 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
348 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
349 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
350 s->uveob_base[0] = s->eob_base + 256 * sbs;
351 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
353 s->b_base = av_malloc(sizeof(VP9Block));
354 s->block_base = av_mallocz((64 * 64 + 128) * 3);
355 if (!s->b_base || !s->block_base)
356 return AVERROR(ENOMEM);
357 s->uvblock_base[0] = s->block_base + 64 * 64;
358 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
359 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
360 s->uveob_base[0] = s->eob_base + 256;
361 s->uveob_base[1] = s->uveob_base[0] + 64;
367 // for some reason the sign bit is at the end, not the start, of a bit sequence
368 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
370 int v = get_bits(gb, n);
371 return get_bits1(gb) ? -v : v;
374 static av_always_inline int inv_recenter_nonneg(int v, int m)
376 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
379 // differential forward probability updates
380 static int update_prob(VP56RangeCoder *c, int p)
382 static const int inv_map_table[254] = {
383 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
384 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
385 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
386 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
387 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
388 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
389 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
390 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
391 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
392 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
393 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
394 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
395 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
396 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
397 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
398 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
399 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
400 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
405 /* This code is trying to do a differential probability update. For a
406 * current probability A in the range [1, 255], the difference to a new
407 * probability of any value can be expressed differentially as 1-A,255-A
408 * where some part of this (absolute range) exists both in positive as
409 * well as the negative part, whereas another part only exists in one
410 * half. We're trying to code this shared part differentially, i.e.
411 * times two where the value of the lowest bit specifies the sign, and
412 * the single part is then coded on top of this. This absolute difference
413 * then again has a value of [0,254], but a bigger value in this range
414 * indicates that we're further away from the original value A, so we
415 * can code this as a VLC code, since higher values are increasingly
416 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
417 * updates vs. the 'fine, exact' updates further down the range, which
418 * adds one extra dimension to this differential update model. */
420 if (!vp8_rac_get(c)) {
421 d = vp8_rac_get_uint(c, 4) + 0;
422 } else if (!vp8_rac_get(c)) {
423 d = vp8_rac_get_uint(c, 4) + 16;
424 } else if (!vp8_rac_get(c)) {
425 d = vp8_rac_get_uint(c, 5) + 32;
427 d = vp8_rac_get_uint(c, 7);
429 d = (d << 1) - 65 + vp8_rac_get(c);
433 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
434 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
437 static int decode_frame_header(AVCodecContext *ctx,
438 const uint8_t *data, int size, int *ref)
440 VP9Context *s = ctx->priv_data;
441 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
443 const uint8_t *data2;
446 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
447 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
450 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
451 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
452 return AVERROR_INVALIDDATA;
454 s->profile = get_bits1(&s->gb);
455 if (get_bits1(&s->gb)) { // reserved bit
456 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
457 return AVERROR_INVALIDDATA;
459 if (get_bits1(&s->gb)) {
460 *ref = get_bits(&s->gb, 3);
463 s->last_uses_2pass = s->uses_2pass;
464 s->last_keyframe = s->keyframe;
465 s->keyframe = !get_bits1(&s->gb);
466 last_invisible = s->invisible;
467 s->invisible = !get_bits1(&s->gb);
468 s->errorres = get_bits1(&s->gb);
469 // FIXME disable this upon resolution change
470 s->use_last_frame_mvs = !s->errorres && !last_invisible;
472 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
473 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
474 return AVERROR_INVALIDDATA;
476 s->colorspace = get_bits(&s->gb, 3);
477 if (s->colorspace == 7) { // RGB = profile 1
478 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
479 return AVERROR_INVALIDDATA;
481 s->fullrange = get_bits1(&s->gb);
482 // for profile 1, here follows the subsampling bits
483 s->refreshrefmask = 0xff;
484 w = get_bits(&s->gb, 16) + 1;
485 h = get_bits(&s->gb, 16) + 1;
486 if (get_bits1(&s->gb)) // display size
487 skip_bits(&s->gb, 32);
489 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
490 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
492 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
493 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
494 return AVERROR_INVALIDDATA;
496 s->refreshrefmask = get_bits(&s->gb, 8);
497 w = get_bits(&s->gb, 16) + 1;
498 h = get_bits(&s->gb, 16) + 1;
499 if (get_bits1(&s->gb)) // display size
500 skip_bits(&s->gb, 32);
502 s->refreshrefmask = get_bits(&s->gb, 8);
503 s->refidx[0] = get_bits(&s->gb, 3);
504 s->signbias[0] = get_bits1(&s->gb);
505 s->refidx[1] = get_bits(&s->gb, 3);
506 s->signbias[1] = get_bits1(&s->gb);
507 s->refidx[2] = get_bits(&s->gb, 3);
508 s->signbias[2] = get_bits1(&s->gb);
509 if (!s->refs[s->refidx[0]].f->data[0] ||
510 !s->refs[s->refidx[1]].f->data[0] ||
511 !s->refs[s->refidx[2]].f->data[0]) {
512 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
513 return AVERROR_INVALIDDATA;
515 if (get_bits1(&s->gb)) {
516 w = s->refs[s->refidx[0]].f->width;
517 h = s->refs[s->refidx[0]].f->height;
518 } else if (get_bits1(&s->gb)) {
519 w = s->refs[s->refidx[1]].f->width;
520 h = s->refs[s->refidx[1]].f->height;
521 } else if (get_bits1(&s->gb)) {
522 w = s->refs[s->refidx[2]].f->width;
523 h = s->refs[s->refidx[2]].f->height;
525 w = get_bits(&s->gb, 16) + 1;
526 h = get_bits(&s->gb, 16) + 1;
528 if (get_bits1(&s->gb)) // display size
529 skip_bits(&s->gb, 32);
530 s->highprecisionmvs = get_bits1(&s->gb);
531 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
533 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
534 s->signbias[0] != s->signbias[2];
535 if (s->allowcompinter) {
536 if (s->signbias[0] == s->signbias[1]) {
538 s->varcompref[0] = 0;
539 s->varcompref[1] = 1;
540 } else if (s->signbias[0] == s->signbias[2]) {
542 s->varcompref[0] = 0;
543 s->varcompref[1] = 2;
546 s->varcompref[0] = 1;
547 s->varcompref[1] = 2;
552 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
553 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
554 s->framectxid = c = get_bits(&s->gb, 2);
556 /* loopfilter header data */
557 s->filter.level = get_bits(&s->gb, 6);
558 sharp = get_bits(&s->gb, 3);
559 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
560 // the old cache values since they are still valid
561 if (s->filter.sharpness != sharp)
562 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
563 s->filter.sharpness = sharp;
564 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
565 if (get_bits1(&s->gb)) {
566 for (i = 0; i < 4; i++)
567 if (get_bits1(&s->gb))
568 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
569 for (i = 0; i < 2; i++)
570 if (get_bits1(&s->gb))
571 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
574 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
577 /* quantization header data */
578 s->yac_qi = get_bits(&s->gb, 8);
579 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
580 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
581 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
582 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
583 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
585 /* segmentation header info */
586 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
587 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
588 for (i = 0; i < 7; i++)
589 s->prob.seg[i] = get_bits1(&s->gb) ?
590 get_bits(&s->gb, 8) : 255;
591 if ((s->segmentation.temporal = get_bits1(&s->gb)))
592 for (i = 0; i < 3; i++)
593 s->prob.segpred[i] = get_bits1(&s->gb) ?
594 get_bits(&s->gb, 8) : 255;
597 if (get_bits1(&s->gb)) {
598 s->segmentation.absolute_vals = get_bits1(&s->gb);
599 for (i = 0; i < 8; i++) {
600 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
601 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
602 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
603 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
604 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
605 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
606 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
610 s->segmentation.feat[0].q_enabled = 0;
611 s->segmentation.feat[0].lf_enabled = 0;
612 s->segmentation.feat[0].skip_enabled = 0;
613 s->segmentation.feat[0].ref_enabled = 0;
616 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
617 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
618 int qyac, qydc, quvac, quvdc, lflvl, sh;
620 if (s->segmentation.feat[i].q_enabled) {
621 if (s->segmentation.absolute_vals)
622 qyac = s->segmentation.feat[i].q_val;
624 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
628 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
629 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
630 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
631 qyac = av_clip_uintp2(qyac, 8);
633 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
634 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
635 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
636 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
638 sh = s->filter.level >= 32;
639 if (s->segmentation.feat[i].lf_enabled) {
640 if (s->segmentation.absolute_vals)
641 lflvl = s->segmentation.feat[i].lf_val;
643 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
645 lflvl = s->filter.level;
647 s->segmentation.feat[i].lflvl[0][0] =
648 s->segmentation.feat[i].lflvl[0][1] =
649 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
650 for (j = 1; j < 4; j++) {
651 s->segmentation.feat[i].lflvl[j][0] =
652 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
653 s->lf_delta.mode[0]) << sh), 6);
654 s->segmentation.feat[i].lflvl[j][1] =
655 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
656 s->lf_delta.mode[1]) << sh), 6);
661 if ((res = update_size(ctx, w, h)) < 0) {
662 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
665 for (s->tiling.log2_tile_cols = 0;
666 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
667 s->tiling.log2_tile_cols++) ;
668 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
669 max = FFMAX(0, max - 1);
670 while (max > s->tiling.log2_tile_cols) {
671 if (get_bits1(&s->gb))
672 s->tiling.log2_tile_cols++;
676 s->tiling.log2_tile_rows = decode012(&s->gb);
677 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
678 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
679 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
680 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
681 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
683 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
684 return AVERROR(ENOMEM);
688 if (s->keyframe || s->errorres || s->intraonly) {
689 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
690 s->prob_ctx[3].p = vp9_default_probs;
691 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
692 sizeof(vp9_default_coef_probs));
693 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
694 sizeof(vp9_default_coef_probs));
695 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
696 sizeof(vp9_default_coef_probs));
697 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
698 sizeof(vp9_default_coef_probs));
701 // next 16 bits is size of the rest of the header (arith-coded)
702 size2 = get_bits(&s->gb, 16);
703 data2 = align_get_bits(&s->gb);
704 if (size2 > size - (data2 - data)) {
705 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
706 return AVERROR_INVALIDDATA;
708 ff_vp56_init_range_decoder(&s->c, data2, size2);
709 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
710 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
711 return AVERROR_INVALIDDATA;
714 if (s->keyframe || s->intraonly) {
715 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
717 memset(&s->counts, 0, sizeof(s->counts));
719 // FIXME is it faster to not copy here, but do it down in the fw updates
720 // as explicit copies if the fw update is missing (and skip the copy upon
722 s->prob.p = s->prob_ctx[c].p;
726 s->txfmmode = TX_4X4;
728 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
729 if (s->txfmmode == 3)
730 s->txfmmode += vp8_rac_get(&s->c);
732 if (s->txfmmode == TX_SWITCHABLE) {
733 for (i = 0; i < 2; i++)
734 if (vp56_rac_get_prob_branchy(&s->c, 252))
735 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
736 for (i = 0; i < 2; i++)
737 for (j = 0; j < 2; j++)
738 if (vp56_rac_get_prob_branchy(&s->c, 252))
739 s->prob.p.tx16p[i][j] =
740 update_prob(&s->c, s->prob.p.tx16p[i][j]);
741 for (i = 0; i < 2; i++)
742 for (j = 0; j < 3; j++)
743 if (vp56_rac_get_prob_branchy(&s->c, 252))
744 s->prob.p.tx32p[i][j] =
745 update_prob(&s->c, s->prob.p.tx32p[i][j]);
750 for (i = 0; i < 4; i++) {
751 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
752 if (vp8_rac_get(&s->c)) {
753 for (j = 0; j < 2; j++)
754 for (k = 0; k < 2; k++)
755 for (l = 0; l < 6; l++)
756 for (m = 0; m < 6; m++) {
757 uint8_t *p = s->prob.coef[i][j][k][l][m];
758 uint8_t *r = ref[j][k][l][m];
759 if (m >= 3 && l == 0) // dc only has 3 pt
761 for (n = 0; n < 3; n++) {
762 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
763 p[n] = update_prob(&s->c, r[n]);
771 for (j = 0; j < 2; j++)
772 for (k = 0; k < 2; k++)
773 for (l = 0; l < 6; l++)
774 for (m = 0; m < 6; m++) {
775 uint8_t *p = s->prob.coef[i][j][k][l][m];
776 uint8_t *r = ref[j][k][l][m];
777 if (m > 3 && l == 0) // dc only has 3 pt
783 if (s->txfmmode == i)
788 for (i = 0; i < 3; i++)
789 if (vp56_rac_get_prob_branchy(&s->c, 252))
790 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
791 if (!s->keyframe && !s->intraonly) {
792 for (i = 0; i < 7; i++)
793 for (j = 0; j < 3; j++)
794 if (vp56_rac_get_prob_branchy(&s->c, 252))
795 s->prob.p.mv_mode[i][j] =
796 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
798 if (s->filtermode == FILTER_SWITCHABLE)
799 for (i = 0; i < 4; i++)
800 for (j = 0; j < 2; j++)
801 if (vp56_rac_get_prob_branchy(&s->c, 252))
802 s->prob.p.filter[i][j] =
803 update_prob(&s->c, s->prob.p.filter[i][j]);
805 for (i = 0; i < 4; i++)
806 if (vp56_rac_get_prob_branchy(&s->c, 252))
807 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
809 if (s->allowcompinter) {
810 s->comppredmode = vp8_rac_get(&s->c);
812 s->comppredmode += vp8_rac_get(&s->c);
813 if (s->comppredmode == PRED_SWITCHABLE)
814 for (i = 0; i < 5; i++)
815 if (vp56_rac_get_prob_branchy(&s->c, 252))
817 update_prob(&s->c, s->prob.p.comp[i]);
819 s->comppredmode = PRED_SINGLEREF;
822 if (s->comppredmode != PRED_COMPREF) {
823 for (i = 0; i < 5; i++) {
824 if (vp56_rac_get_prob_branchy(&s->c, 252))
825 s->prob.p.single_ref[i][0] =
826 update_prob(&s->c, s->prob.p.single_ref[i][0]);
827 if (vp56_rac_get_prob_branchy(&s->c, 252))
828 s->prob.p.single_ref[i][1] =
829 update_prob(&s->c, s->prob.p.single_ref[i][1]);
833 if (s->comppredmode != PRED_SINGLEREF) {
834 for (i = 0; i < 5; i++)
835 if (vp56_rac_get_prob_branchy(&s->c, 252))
836 s->prob.p.comp_ref[i] =
837 update_prob(&s->c, s->prob.p.comp_ref[i]);
840 for (i = 0; i < 4; i++)
841 for (j = 0; j < 9; j++)
842 if (vp56_rac_get_prob_branchy(&s->c, 252))
843 s->prob.p.y_mode[i][j] =
844 update_prob(&s->c, s->prob.p.y_mode[i][j]);
846 for (i = 0; i < 4; i++)
847 for (j = 0; j < 4; j++)
848 for (k = 0; k < 3; k++)
849 if (vp56_rac_get_prob_branchy(&s->c, 252))
850 s->prob.p.partition[3 - i][j][k] =
851 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
853 // mv fields don't use the update_prob subexp model for some reason
854 for (i = 0; i < 3; i++)
855 if (vp56_rac_get_prob_branchy(&s->c, 252))
856 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
858 for (i = 0; i < 2; i++) {
859 if (vp56_rac_get_prob_branchy(&s->c, 252))
860 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
862 for (j = 0; j < 10; j++)
863 if (vp56_rac_get_prob_branchy(&s->c, 252))
864 s->prob.p.mv_comp[i].classes[j] =
865 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
867 if (vp56_rac_get_prob_branchy(&s->c, 252))
868 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
870 for (j = 0; j < 10; j++)
871 if (vp56_rac_get_prob_branchy(&s->c, 252))
872 s->prob.p.mv_comp[i].bits[j] =
873 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
876 for (i = 0; i < 2; i++) {
877 for (j = 0; j < 2; j++)
878 for (k = 0; k < 3; k++)
879 if (vp56_rac_get_prob_branchy(&s->c, 252))
880 s->prob.p.mv_comp[i].class0_fp[j][k] =
881 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
883 for (j = 0; j < 3; j++)
884 if (vp56_rac_get_prob_branchy(&s->c, 252))
885 s->prob.p.mv_comp[i].fp[j] =
886 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
889 if (s->highprecisionmvs) {
890 for (i = 0; i < 2; i++) {
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.mv_comp[i].class0_hp =
893 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
895 if (vp56_rac_get_prob_branchy(&s->c, 252))
896 s->prob.p.mv_comp[i].hp =
897 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
902 return (data2 - data) + size2;
905 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
908 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
909 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
912 static void find_ref_mvs(VP9Context *s,
913 VP56mv *pmv, int ref, int z, int idx, int sb)
915 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
916 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
917 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
918 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
919 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
920 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
921 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
922 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
923 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
924 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
925 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
926 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
927 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
928 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
929 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
930 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
931 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
932 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
933 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
934 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
935 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
936 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
937 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
938 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
939 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
940 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
941 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
944 int row = s->row, col = s->col, row7 = s->row7;
945 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
946 #define INVALID_MV 0x80008000U
947 uint32_t mem = INVALID_MV;
950 #define RETURN_DIRECT_MV(mv) \
952 uint32_t m = AV_RN32A(&mv); \
956 } else if (mem == INVALID_MV) { \
958 } else if (m != mem) { \
965 if (sb == 2 || sb == 1) {
966 RETURN_DIRECT_MV(b->mv[0][z]);
967 } else if (sb == 3) {
968 RETURN_DIRECT_MV(b->mv[2][z]);
969 RETURN_DIRECT_MV(b->mv[1][z]);
970 RETURN_DIRECT_MV(b->mv[0][z]);
973 #define RETURN_MV(mv) \
978 clamp_mv(&tmp, &mv, s); \
979 m = AV_RN32A(&tmp); \
983 } else if (mem == INVALID_MV) { \
985 } else if (m != mem) { \
990 uint32_t m = AV_RN32A(&mv); \
992 clamp_mv(pmv, &mv, s); \
994 } else if (mem == INVALID_MV) { \
996 } else if (m != mem) { \
997 clamp_mv(pmv, &mv, s); \
1004 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1005 if (mv->ref[0] == ref) {
1006 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1007 } else if (mv->ref[1] == ref) {
1008 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1011 if (col > s->tiling.tile_col_start) {
1012 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1013 if (mv->ref[0] == ref) {
1014 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1015 } else if (mv->ref[1] == ref) {
1016 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1024 // previously coded MVs in this neighbourhood, using same reference frame
1025 for (; i < 8; i++) {
1026 int c = p[i][0] + col, r = p[i][1] + row;
1028 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1029 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1031 if (mv->ref[0] == ref) {
1032 RETURN_MV(mv->mv[0]);
1033 } else if (mv->ref[1] == ref) {
1034 RETURN_MV(mv->mv[1]);
1039 // MV at this position in previous frame, using same reference frame
1040 if (s->use_last_frame_mvs) {
1041 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1043 if (!s->last_uses_2pass)
1044 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1045 if (mv->ref[0] == ref) {
1046 RETURN_MV(mv->mv[0]);
1047 } else if (mv->ref[1] == ref) {
1048 RETURN_MV(mv->mv[1]);
1052 #define RETURN_SCALE_MV(mv, scale) \
1055 VP56mv mv_temp = { -mv.x, -mv.y }; \
1056 RETURN_MV(mv_temp); \
1062 // previously coded MVs in this neighbourhood, using different reference frame
1063 for (i = 0; i < 8; i++) {
1064 int c = p[i][0] + col, r = p[i][1] + row;
1066 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1067 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1069 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1070 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1072 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1073 // BUG - libvpx has this condition regardless of whether
1074 // we used the first ref MV and pre-scaling
1075 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1076 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1081 // MV at this position in previous frame, using different reference frame
1082 if (s->use_last_frame_mvs) {
1083 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1085 // no need to await_progress, because we already did that above
1086 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1087 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1089 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1090 // BUG - libvpx has this condition regardless of whether
1091 // we used the first ref MV and pre-scaling
1092 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1093 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1100 #undef RETURN_SCALE_MV
1103 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1105 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1106 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1107 s->prob.p.mv_comp[idx].classes);
1109 s->counts.mv_comp[idx].sign[sign]++;
1110 s->counts.mv_comp[idx].classes[c]++;
1114 for (n = 0, m = 0; m < c; m++) {
1115 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1117 s->counts.mv_comp[idx].bits[m][bit]++;
1120 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1122 s->counts.mv_comp[idx].fp[bit]++;
1124 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1125 s->counts.mv_comp[idx].hp[bit]++;
1129 // bug in libvpx - we count for bw entropy purposes even if the
1131 s->counts.mv_comp[idx].hp[1]++;
1135 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1136 s->counts.mv_comp[idx].class0[n]++;
1137 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1138 s->prob.p.mv_comp[idx].class0_fp[n]);
1139 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1140 n = (n << 3) | (bit << 1);
1142 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1143 s->counts.mv_comp[idx].class0_hp[bit]++;
1147 // bug in libvpx - we count for bw entropy purposes even if the
1149 s->counts.mv_comp[idx].class0_hp[1]++;
1153 return sign ? -(n + 1) : (n + 1);
1156 static void fill_mv(VP9Context *s,
1157 VP56mv *mv, int mode, int sb)
1161 if (mode == ZEROMV) {
1162 memset(mv, 0, sizeof(*mv) * 2);
1166 // FIXME cache this value and reuse for other subblocks
1167 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1168 mode == NEWMV ? -1 : sb);
1169 // FIXME maybe move this code into find_ref_mvs()
1170 if ((mode == NEWMV || sb == -1) &&
1171 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1185 if (mode == NEWMV) {
1186 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1187 s->prob.p.mv_joint);
1189 s->counts.mv_joint[j]++;
1190 if (j >= MV_JOINT_V)
1191 mv[0].y += read_mv_component(s, 0, hp);
1193 mv[0].x += read_mv_component(s, 1, hp);
1197 // FIXME cache this value and reuse for other subblocks
1198 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1199 mode == NEWMV ? -1 : sb);
1200 if ((mode == NEWMV || sb == -1) &&
1201 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1215 if (mode == NEWMV) {
1216 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1217 s->prob.p.mv_joint);
1219 s->counts.mv_joint[j]++;
1220 if (j >= MV_JOINT_V)
1221 mv[1].y += read_mv_component(s, 0, hp);
1223 mv[1].x += read_mv_component(s, 1, hp);
1229 static void decode_mode(AVCodecContext *ctx)
1231 static const uint8_t left_ctx[N_BS_SIZES] = {
1232 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1234 static const uint8_t above_ctx[N_BS_SIZES] = {
1235 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1237 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1238 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1239 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1241 VP9Context *s = ctx->priv_data;
1243 int row = s->row, col = s->col, row7 = s->row7;
1244 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1245 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1246 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1247 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1249 if (!s->segmentation.enabled) {
1251 } else if (s->keyframe || s->intraonly) {
1252 b->seg_id = s->segmentation.update_map ?
1253 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg) : 0;
1254 } else if (!s->segmentation.update_map ||
1255 (s->segmentation.temporal &&
1256 vp56_rac_get_prob_branchy(&s->c,
1257 s->prob.segpred[s->above_segpred_ctx[col] +
1258 s->left_segpred_ctx[row7]]))) {
1260 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1262 if (!s->last_uses_2pass)
1263 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1264 for (y = 0; y < h4; y++)
1265 for (x = 0; x < w4; x++)
1266 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1267 av_assert1(pred < 8);
1270 memset(&s->above_segpred_ctx[col], 1, w4);
1271 memset(&s->left_segpred_ctx[row7], 1, h4);
1273 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1276 memset(&s->above_segpred_ctx[col], 0, w4);
1277 memset(&s->left_segpred_ctx[row7], 0, h4);
1279 if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
1280 uint8_t *segmap = s->frames[CUR_FRAME].segmentation_map;
1282 for (y = 0; y < h4; y++)
1283 memset(&segmap[(y + row) * 8 * s->sb_cols + col], b->seg_id, w4);
1286 b->skip = s->segmentation.enabled &&
1287 s->segmentation.feat[b->seg_id].skip_enabled;
1289 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1290 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1291 s->counts.skip[c][b->skip]++;
1294 if (s->keyframe || s->intraonly) {
1296 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1297 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1301 if (have_a && have_l) {
1302 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1305 c = have_a ? 2 * s->above_intra_ctx[col] :
1306 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1308 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1309 s->counts.intra[c][bit]++;
1313 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1317 c = (s->above_skip_ctx[col] ? max_tx :
1318 s->above_txfm_ctx[col]) +
1319 (s->left_skip_ctx[row7] ? max_tx :
1320 s->left_txfm_ctx[row7]) > max_tx;
1322 c = s->above_skip_ctx[col] ? 1 :
1323 (s->above_txfm_ctx[col] * 2 > max_tx);
1325 } else if (have_l) {
1326 c = s->left_skip_ctx[row7] ? 1 :
1327 (s->left_txfm_ctx[row7] * 2 > max_tx);
1333 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1335 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1337 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1339 s->counts.tx32p[c][b->tx]++;
1342 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1344 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1345 s->counts.tx16p[c][b->tx]++;
1348 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1349 s->counts.tx8p[c][b->tx]++;
1356 b->tx = FFMIN(max_tx, s->txfmmode);
1359 if (s->keyframe || s->intraonly) {
1360 uint8_t *a = &s->above_mode_ctx[col * 2];
1361 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1364 if (b->bs > BS_8x8) {
1365 // FIXME the memory storage intermediates here aren't really
1366 // necessary, they're just there to make the code slightly
1368 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1369 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1370 if (b->bs != BS_8x4) {
1371 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1372 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1373 l[0] = a[1] = b->mode[1];
1375 l[0] = a[1] = b->mode[1] = b->mode[0];
1377 if (b->bs != BS_4x8) {
1378 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1379 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1380 if (b->bs != BS_8x4) {
1381 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1382 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1383 l[1] = a[1] = b->mode[3];
1385 l[1] = a[1] = b->mode[3] = b->mode[2];
1388 b->mode[2] = b->mode[0];
1389 l[1] = a[1] = b->mode[3] = b->mode[1];
1392 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1393 vp9_default_kf_ymode_probs[*a][*l]);
1394 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1395 // FIXME this can probably be optimized
1396 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1397 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1399 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1400 vp9_default_kf_uvmode_probs[b->mode[3]]);
1401 } else if (b->intra) {
1403 if (b->bs > BS_8x8) {
1404 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1405 s->prob.p.y_mode[0]);
1406 s->counts.y_mode[0][b->mode[0]]++;
1407 if (b->bs != BS_8x4) {
1408 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1409 s->prob.p.y_mode[0]);
1410 s->counts.y_mode[0][b->mode[1]]++;
1412 b->mode[1] = b->mode[0];
1414 if (b->bs != BS_4x8) {
1415 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1416 s->prob.p.y_mode[0]);
1417 s->counts.y_mode[0][b->mode[2]]++;
1418 if (b->bs != BS_8x4) {
1419 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1420 s->prob.p.y_mode[0]);
1421 s->counts.y_mode[0][b->mode[3]]++;
1423 b->mode[3] = b->mode[2];
1426 b->mode[2] = b->mode[0];
1427 b->mode[3] = b->mode[1];
1430 static const uint8_t size_group[10] = {
1431 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1433 int sz = size_group[b->bs];
1435 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1436 s->prob.p.y_mode[sz]);
1437 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1438 s->counts.y_mode[sz][b->mode[3]]++;
1440 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1441 s->prob.p.uv_mode[b->mode[3]]);
1442 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1444 static const uint8_t inter_mode_ctx_lut[14][14] = {
1445 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1446 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1447 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1448 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1449 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1450 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1451 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1452 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1453 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1454 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1455 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1456 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1457 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1458 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1461 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1462 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1464 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1466 // read comp_pred flag
1467 if (s->comppredmode != PRED_SWITCHABLE) {
1468 b->comp = s->comppredmode == PRED_COMPREF;
1472 // FIXME add intra as ref=0xff (or -1) to make these easier?
1475 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1477 } else if (s->above_comp_ctx[col]) {
1478 c = 2 + (s->left_intra_ctx[row7] ||
1479 s->left_ref_ctx[row7] == s->fixcompref);
1480 } else if (s->left_comp_ctx[row7]) {
1481 c = 2 + (s->above_intra_ctx[col] ||
1482 s->above_ref_ctx[col] == s->fixcompref);
1484 c = (!s->above_intra_ctx[col] &&
1485 s->above_ref_ctx[col] == s->fixcompref) ^
1486 (!s->left_intra_ctx[row7] &&
1487 s->left_ref_ctx[row & 7] == s->fixcompref);
1490 c = s->above_comp_ctx[col] ? 3 :
1491 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1493 } else if (have_l) {
1494 c = s->left_comp_ctx[row7] ? 3 :
1495 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1499 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1500 s->counts.comp[c][b->comp]++;
1503 // read actual references
1504 // FIXME probably cache a few variables here to prevent repetitive
1505 // memory accesses below
1506 if (b->comp) /* two references */ {
1507 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1509 b->ref[fix_idx] = s->fixcompref;
1510 // FIXME can this codeblob be replaced by some sort of LUT?
1513 if (s->above_intra_ctx[col]) {
1514 if (s->left_intra_ctx[row7]) {
1517 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1519 } else if (s->left_intra_ctx[row7]) {
1520 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1522 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1524 if (refl == refa && refa == s->varcompref[1]) {
1526 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1527 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1528 (refl == s->fixcompref && refa == s->varcompref[0])) {
1531 c = (refa == refl) ? 3 : 1;
1533 } else if (!s->left_comp_ctx[row7]) {
1534 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1537 c = (refl == s->varcompref[1] &&
1538 refa != s->varcompref[1]) ? 2 : 4;
1540 } else if (!s->above_comp_ctx[col]) {
1541 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1544 c = (refa == s->varcompref[1] &&
1545 refl != s->varcompref[1]) ? 2 : 4;
1548 c = (refl == refa) ? 4 : 2;
1552 if (s->above_intra_ctx[col]) {
1554 } else if (s->above_comp_ctx[col]) {
1555 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1557 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1560 } else if (have_l) {
1561 if (s->left_intra_ctx[row7]) {
1563 } else if (s->left_comp_ctx[row7]) {
1564 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1566 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1571 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1572 b->ref[var_idx] = s->varcompref[bit];
1573 s->counts.comp_ref[c][bit]++;
1574 } else /* single reference */ {
1577 if (have_a && !s->above_intra_ctx[col]) {
1578 if (have_l && !s->left_intra_ctx[row7]) {
1579 if (s->left_comp_ctx[row7]) {
1580 if (s->above_comp_ctx[col]) {
1581 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1582 !s->above_ref_ctx[col]);
1584 c = (3 * !s->above_ref_ctx[col]) +
1585 (!s->fixcompref || !s->left_ref_ctx[row7]);
1587 } else if (s->above_comp_ctx[col]) {
1588 c = (3 * !s->left_ref_ctx[row7]) +
1589 (!s->fixcompref || !s->above_ref_ctx[col]);
1591 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1593 } else if (s->above_intra_ctx[col]) {
1595 } else if (s->above_comp_ctx[col]) {
1596 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1598 c = 4 * (!s->above_ref_ctx[col]);
1600 } else if (have_l && !s->left_intra_ctx[row7]) {
1601 if (s->left_intra_ctx[row7]) {
1603 } else if (s->left_comp_ctx[row7]) {
1604 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1606 c = 4 * (!s->left_ref_ctx[row7]);
1611 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1612 s->counts.single_ref[c][0][bit]++;
1616 // FIXME can this codeblob be replaced by some sort of LUT?
1619 if (s->left_intra_ctx[row7]) {
1620 if (s->above_intra_ctx[col]) {
1622 } else if (s->above_comp_ctx[col]) {
1623 c = 1 + 2 * (s->fixcompref == 1 ||
1624 s->above_ref_ctx[col] == 1);
1625 } else if (!s->above_ref_ctx[col]) {
1628 c = 4 * (s->above_ref_ctx[col] == 1);
1630 } else if (s->above_intra_ctx[col]) {
1631 if (s->left_intra_ctx[row7]) {
1633 } else if (s->left_comp_ctx[row7]) {
1634 c = 1 + 2 * (s->fixcompref == 1 ||
1635 s->left_ref_ctx[row7] == 1);
1636 } else if (!s->left_ref_ctx[row7]) {
1639 c = 4 * (s->left_ref_ctx[row7] == 1);
1641 } else if (s->above_comp_ctx[col]) {
1642 if (s->left_comp_ctx[row7]) {
1643 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1644 c = 3 * (s->fixcompref == 1 ||
1645 s->left_ref_ctx[row7] == 1);
1649 } else if (!s->left_ref_ctx[row7]) {
1650 c = 1 + 2 * (s->fixcompref == 1 ||
1651 s->above_ref_ctx[col] == 1);
1653 c = 3 * (s->left_ref_ctx[row7] == 1) +
1654 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1656 } else if (s->left_comp_ctx[row7]) {
1657 if (!s->above_ref_ctx[col]) {
1658 c = 1 + 2 * (s->fixcompref == 1 ||
1659 s->left_ref_ctx[row7] == 1);
1661 c = 3 * (s->above_ref_ctx[col] == 1) +
1662 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1664 } else if (!s->above_ref_ctx[col]) {
1665 if (!s->left_ref_ctx[row7]) {
1668 c = 4 * (s->left_ref_ctx[row7] == 1);
1670 } else if (!s->left_ref_ctx[row7]) {
1671 c = 4 * (s->above_ref_ctx[col] == 1);
1673 c = 2 * (s->left_ref_ctx[row7] == 1) +
1674 2 * (s->above_ref_ctx[col] == 1);
1677 if (s->above_intra_ctx[col] ||
1678 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1680 } else if (s->above_comp_ctx[col]) {
1681 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1683 c = 4 * (s->above_ref_ctx[col] == 1);
1686 } else if (have_l) {
1687 if (s->left_intra_ctx[row7] ||
1688 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1690 } else if (s->left_comp_ctx[row7]) {
1691 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1693 c = 4 * (s->left_ref_ctx[row7] == 1);
1698 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1699 s->counts.single_ref[c][1][bit]++;
1700 b->ref[0] = 1 + bit;
1705 if (b->bs <= BS_8x8) {
1706 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1707 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1709 static const uint8_t off[10] = {
1710 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1713 // FIXME this needs to use the LUT tables from find_ref_mvs
1714 // because not all are -1,0/0,-1
1715 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1716 [s->left_mode_ctx[row7 + off[b->bs]]];
1718 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1719 s->prob.p.mv_mode[c]);
1720 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1721 s->counts.mv_mode[c][b->mode[0] - 10]++;
1725 if (s->filtermode == FILTER_SWITCHABLE) {
1728 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1729 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1730 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1731 s->left_filter_ctx[row7] : 3;
1733 c = s->above_filter_ctx[col];
1735 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1736 c = s->left_filter_ctx[row7];
1741 b->filter = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1742 s->prob.p.filter[c]);
1743 s->counts.filter[c][b->filter]++;
1745 b->filter = s->filtermode;
1748 if (b->bs > BS_8x8) {
1749 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1751 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1752 s->prob.p.mv_mode[c]);
1753 s->counts.mv_mode[c][b->mode[0] - 10]++;
1754 fill_mv(s, b->mv[0], b->mode[0], 0);
1756 if (b->bs != BS_8x4) {
1757 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1758 s->prob.p.mv_mode[c]);
1759 s->counts.mv_mode[c][b->mode[1] - 10]++;
1760 fill_mv(s, b->mv[1], b->mode[1], 1);
1762 b->mode[1] = b->mode[0];
1763 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1764 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1767 if (b->bs != BS_4x8) {
1768 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1769 s->prob.p.mv_mode[c]);
1770 s->counts.mv_mode[c][b->mode[2] - 10]++;
1771 fill_mv(s, b->mv[2], b->mode[2], 2);
1773 if (b->bs != BS_8x4) {
1774 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1775 s->prob.p.mv_mode[c]);
1776 s->counts.mv_mode[c][b->mode[3] - 10]++;
1777 fill_mv(s, b->mv[3], b->mode[3], 3);
1779 b->mode[3] = b->mode[2];
1780 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1781 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1784 b->mode[2] = b->mode[0];
1785 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1786 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1787 b->mode[3] = b->mode[1];
1788 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1789 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1792 fill_mv(s, b->mv[0], b->mode[0], -1);
1793 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1794 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1795 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1796 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1797 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1798 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1802 // FIXME this can probably be optimized
1803 memset(&s->above_skip_ctx[col], b->skip, w4);
1804 memset(&s->left_skip_ctx[row7], b->skip, h4);
1805 memset(&s->above_txfm_ctx[col], b->tx, w4);
1806 memset(&s->left_txfm_ctx[row7], b->tx, h4);
1807 memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
1808 memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
1809 if (!s->keyframe && !s->intraonly) {
1810 memset(&s->above_intra_ctx[col], b->intra, w4);
1811 memset(&s->left_intra_ctx[row7], b->intra, h4);
1812 memset(&s->above_comp_ctx[col], b->comp, w4);
1813 memset(&s->left_comp_ctx[row7], b->comp, h4);
1814 memset(&s->above_mode_ctx[col], b->mode[3], w4);
1815 memset(&s->left_mode_ctx[row7], b->mode[3], h4);
1816 if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
1817 memset(&s->above_filter_ctx[col], b->filter, w4);
1818 memset(&s->left_filter_ctx[row7], b->filter, h4);
1819 b->filter = vp9_filter_lut[b->filter];
1821 if (b->bs > BS_8x8) {
1822 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1824 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1825 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1826 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1827 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1828 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1829 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1830 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1831 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1833 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1835 for (n = 0; n < w4 * 2; n++) {
1836 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1837 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1839 for (n = 0; n < h4 * 2; n++) {
1840 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1841 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1845 if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
1846 // as a direct check in above branches
1847 int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1849 memset(&s->above_ref_ctx[col], vref, w4);
1850 memset(&s->left_ref_ctx[row7], vref, h4);
1855 for (y = 0; y < h4; y++) {
1856 int x, o = (row + y) * s->sb_cols * 8 + col;
1857 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1860 for (x = 0; x < w4; x++) {
1864 } else if (b->comp) {
1865 for (x = 0; x < w4; x++) {
1866 mv[x].ref[0] = b->ref[0];
1867 mv[x].ref[1] = b->ref[1];
1868 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
1869 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
1872 for (x = 0; x < w4; x++) {
1873 mv[x].ref[0] = b->ref[0];
1875 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
1881 // FIXME remove tx argument, and merge cnt/eob arguments?
1882 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
1883 enum TxfmMode tx, unsigned (*cnt)[6][3],
1884 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
1885 int nnz, const int16_t *scan, const int16_t (*nb)[2],
1886 const int16_t *band_counts, const int16_t *qmul)
1888 int i = 0, band = 0, band_left = band_counts[band];
1889 uint8_t *tp = p[0][nnz];
1890 uint8_t cache[1024];
1895 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
1896 eob[band][nnz][val]++;
1901 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
1902 cnt[band][nnz][0]++;
1904 band_left = band_counts[++band];
1906 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1908 if (++i == n_coeffs)
1909 break; //invalid input; blocks should end with EOB
1914 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
1915 cnt[band][nnz][1]++;
1919 // fill in p[3-10] (model fill) - only once per frame for each pos
1921 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
1923 cnt[band][nnz][2]++;
1924 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
1925 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
1926 cache[rc] = val = 2;
1928 val = 3 + vp56_rac_get_prob(c, tp[5]);
1931 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
1933 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
1934 val = 5 + vp56_rac_get_prob(c, 159);
1936 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
1937 val += vp56_rac_get_prob(c, 145);
1941 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
1942 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
1943 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
1944 val += (vp56_rac_get_prob(c, 148) << 1);
1945 val += vp56_rac_get_prob(c, 140);
1947 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
1948 val += (vp56_rac_get_prob(c, 155) << 2);
1949 val += (vp56_rac_get_prob(c, 140) << 1);
1950 val += vp56_rac_get_prob(c, 135);
1952 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
1953 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
1954 val += (vp56_rac_get_prob(c, 157) << 3);
1955 val += (vp56_rac_get_prob(c, 141) << 2);
1956 val += (vp56_rac_get_prob(c, 134) << 1);
1957 val += vp56_rac_get_prob(c, 130);
1959 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
1960 val += (vp56_rac_get_prob(c, 254) << 12);
1961 val += (vp56_rac_get_prob(c, 254) << 11);
1962 val += (vp56_rac_get_prob(c, 252) << 10);
1963 val += (vp56_rac_get_prob(c, 249) << 9);
1964 val += (vp56_rac_get_prob(c, 243) << 8);
1965 val += (vp56_rac_get_prob(c, 230) << 7);
1966 val += (vp56_rac_get_prob(c, 196) << 6);
1967 val += (vp56_rac_get_prob(c, 177) << 5);
1968 val += (vp56_rac_get_prob(c, 153) << 4);
1969 val += (vp56_rac_get_prob(c, 140) << 3);
1970 val += (vp56_rac_get_prob(c, 133) << 2);
1971 val += (vp56_rac_get_prob(c, 130) << 1);
1972 val += vp56_rac_get_prob(c, 129);
1977 band_left = band_counts[++band];
1978 if (tx == TX_32X32) // FIXME slow
1979 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
1981 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
1982 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1984 } while (++i < n_coeffs);
1989 static void decode_coeffs(AVCodecContext *ctx)
1991 VP9Context *s = ctx->priv_data;
1993 int row = s->row, col = s->col;
1994 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
1995 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
1996 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
1997 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
1998 int end_x = FFMIN(2 * (s->cols - col), w4);
1999 int end_y = FFMIN(2 * (s->rows - row), h4);
2000 int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
2001 int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
2002 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2003 int tx = 4 * s->lossless + b->tx;
2004 const int16_t * const *yscans = vp9_scans[tx];
2005 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2006 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2007 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2008 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2009 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2010 static const int16_t band_counts[4][8] = {
2011 { 1, 2, 3, 4, 3, 16 - 13 },
2012 { 1, 2, 3, 4, 11, 64 - 21 },
2013 { 1, 2, 3, 4, 11, 256 - 21 },
2014 { 1, 2, 3, 4, 11, 1024 - 21 },
2016 const int16_t *y_band_counts = band_counts[b->tx];
2017 const int16_t *uv_band_counts = band_counts[b->uvtx];
2020 if (b->tx > TX_4X4) { // FIXME slow
2021 for (y = 0; y < end_y; y += step1d)
2022 for (x = 1; x < step1d; x++)
2024 for (x = 0; x < end_x; x += step1d)
2025 for (y = 1; y < step1d; y++)
2028 for (n = 0, y = 0; y < end_y; y += step1d) {
2029 for (x = 0; x < end_x; x += step1d, n += step) {
2030 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
2033 int nnz = a[x] + l[y];
2034 res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
2035 b->tx, c, e, p, nnz, yscans[txtp],
2036 ynbs[txtp], y_band_counts, qmul[0]);
2037 a[x] = l[y] = !!res;
2038 if (b->tx > TX_8X8) {
2039 AV_WN16A(&s->eob[n], res);
2045 if (b->tx > TX_4X4) { // FIXME slow
2046 for (y = 0; y < end_y; y += step1d)
2047 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
2048 for (x = 0; x < end_x; x += step1d)
2049 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
2052 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2053 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2054 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2059 for (pl = 0; pl < 2; pl++) {
2060 a = &s->above_uv_nnz_ctx[pl][col];
2061 l = &s->left_uv_nnz_ctx[pl][row & 7];
2062 if (b->uvtx > TX_4X4) { // FIXME slow
2063 for (y = 0; y < end_y; y += uvstep1d)
2064 for (x = 1; x < uvstep1d; x++)
2066 for (x = 0; x < end_x; x += uvstep1d)
2067 for (y = 1; y < uvstep1d; y++)
2070 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2071 for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
2072 int nnz = a[x] + l[y];
2073 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
2074 16 * uvstep, b->uvtx, c, e, p, nnz,
2075 uvscan, uvnb, uv_band_counts, qmul[1]);
2076 a[x] = l[y] = !!res;
2077 if (b->uvtx > TX_8X8) {
2078 AV_WN16A(&s->uveob[pl][n], res);
2080 s->uveob[pl][n] = res;
2084 if (b->uvtx > TX_4X4) { // FIXME slow
2085 for (y = 0; y < end_y; y += uvstep1d)
2086 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
2087 for (x = 0; x < end_x; x += uvstep1d)
2088 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
2093 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2094 uint8_t *dst_edge, ptrdiff_t stride_edge,
2095 uint8_t *dst_inner, ptrdiff_t stride_inner,
2096 uint8_t *l, int col, int x, int w,
2097 int row, int y, enum TxfmMode tx,
2100 int have_top = row > 0 || y > 0;
2101 int have_left = col > s->tiling.tile_col_start || x > 0;
2102 int have_right = x < w - 1;
2103 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2104 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2105 { DC_127_PRED, VERT_PRED } },
2106 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2107 { HOR_PRED, HOR_PRED } },
2108 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2109 { LEFT_DC_PRED, DC_PRED } },
2110 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2111 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2112 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2113 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2114 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2115 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2116 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2117 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2118 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2119 { DC_127_PRED, VERT_LEFT_PRED } },
2120 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2121 { HOR_UP_PRED, HOR_UP_PRED } },
2122 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2123 { HOR_PRED, TM_VP8_PRED } },
2125 static const struct {
2126 uint8_t needs_left:1;
2127 uint8_t needs_top:1;
2128 uint8_t needs_topleft:1;
2129 uint8_t needs_topright:1;
2130 } edges[N_INTRA_PRED_MODES] = {
2131 [VERT_PRED] = { .needs_top = 1 },
2132 [HOR_PRED] = { .needs_left = 1 },
2133 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2134 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2135 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2136 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2137 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2138 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2139 [HOR_UP_PRED] = { .needs_left = 1 },
2140 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2141 [LEFT_DC_PRED] = { .needs_left = 1 },
2142 [TOP_DC_PRED] = { .needs_top = 1 },
2143 [DC_128_PRED] = { 0 },
2144 [DC_127_PRED] = { 0 },
2145 [DC_129_PRED] = { 0 }
2148 av_assert2(mode >= 0 && mode < 10);
2149 mode = mode_conv[mode][have_left][have_top];
2150 if (edges[mode].needs_top) {
2151 uint8_t *top, *topleft;
2152 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2153 int n_px_need_tr = 0;
2155 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2158 // if top of sb64-row, use s->intra_pred_data[] instead of
2159 // dst[-stride] for intra prediction (it contains pre- instead of
2160 // post-loopfilter data)
2162 top = !(row & 7) && !y ?
2163 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2164 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2166 topleft = !(row & 7) && !y ?
2167 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2168 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2169 &dst_inner[-stride_inner];
2173 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2174 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2175 n_px_need + n_px_need_tr <= n_px_have) {
2179 if (n_px_need <= n_px_have) {
2180 memcpy(*a, top, n_px_need);
2182 memcpy(*a, top, n_px_have);
2183 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2184 n_px_need - n_px_have);
2187 memset(*a, 127, n_px_need);
2189 if (edges[mode].needs_topleft) {
2190 if (have_left && have_top) {
2191 (*a)[-1] = topleft[-1];
2193 (*a)[-1] = have_top ? 129 : 127;
2196 if (tx == TX_4X4 && edges[mode].needs_topright) {
2197 if (have_top && have_right &&
2198 n_px_need + n_px_need_tr <= n_px_have) {
2199 memcpy(&(*a)[4], &top[4], 4);
2201 memset(&(*a)[4], (*a)[3], 4);
2206 if (edges[mode].needs_left) {
2208 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2209 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2210 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2212 if (n_px_need <= n_px_have) {
2213 for (i = 0; i < n_px_need; i++)
2214 l[i] = dst[i * stride - 1];
2216 for (i = 0; i < n_px_have; i++)
2217 l[i] = dst[i * stride - 1];
2218 memset(&l[i], l[i - 1], n_px_need - n_px_have);
2221 memset(l, 129, 4 << tx);
2228 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2230 VP9Context *s = ctx->priv_data;
2232 int row = s->row, col = s->col;
2233 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2234 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2235 int end_x = FFMIN(2 * (s->cols - col), w4);
2236 int end_y = FFMIN(2 * (s->rows - row), h4);
2237 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2238 int uvstep1d = 1 << b->uvtx, p;
2239 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2241 for (n = 0, y = 0; y < end_y; y += step1d) {
2242 uint8_t *ptr = dst, *ptr_r = dst_r;
2243 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2244 ptr_r += 4 * step1d, n += step) {
2245 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2247 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2248 uint8_t *a = &a_buf[16], l[32];
2249 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2250 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2252 mode = check_intra_mode(s, mode, &a, ptr_r,
2253 s->frames[CUR_FRAME].tf.f->linesize[0],
2254 ptr, s->y_stride, l,
2255 col, x, w4, row, y, b->tx, 0);
2256 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2258 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2259 s->block + 16 * n, eob);
2261 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2262 dst += 4 * step1d * s->y_stride;
2270 step = 1 << (b->uvtx * 2);
2271 for (p = 0; p < 2; p++) {
2272 dst = s->dst[1 + p];
2273 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2274 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2275 uint8_t *ptr = dst, *ptr_r = dst_r;
2276 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2277 ptr_r += 4 * uvstep1d, n += step) {
2278 int mode = b->uvmode;
2279 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2280 uint8_t *a = &a_buf[16], l[32];
2281 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2283 mode = check_intra_mode(s, mode, &a, ptr_r,
2284 s->frames[CUR_FRAME].tf.f->linesize[1],
2285 ptr, s->uv_stride, l,
2286 col, x, w4, row, y, b->uvtx, p + 1);
2287 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2289 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2290 s->uvblock[p] + 16 * n, eob);
2292 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2293 dst += 4 * uvstep1d * s->uv_stride;
2298 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2299 uint8_t *dst, ptrdiff_t dst_stride,
2300 const uint8_t *ref, ptrdiff_t ref_stride,
2301 ThreadFrame *ref_frame,
2302 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2303 int bw, int bh, int w, int h)
2305 int mx = mv->x, my = mv->y, th;
2309 ref += y * ref_stride + x;
2312 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2313 // we use +7 because the last 7 pixels of each sbrow can be changed in
2314 // the longest loopfilter of the next sbrow
2315 th = (y + bh + 4 * !!my + 7) >> 6;
2316 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2317 if (x < !!mx * 3 || y < !!my * 3 ||
2318 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2319 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2320 ref - !!my * 3 * ref_stride - !!mx * 3,
2322 bw + !!mx * 7, bh + !!my * 7,
2323 x - !!mx * 3, y - !!my * 3, w, h);
2324 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2327 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2330 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2331 uint8_t *dst_u, uint8_t *dst_v,
2332 ptrdiff_t dst_stride,
2333 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2334 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2335 ThreadFrame *ref_frame,
2336 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2337 int bw, int bh, int w, int h)
2339 int mx = mv->x, my = mv->y, th;
2343 ref_u += y * src_stride_u + x;
2344 ref_v += y * src_stride_v + x;
2347 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2348 // we use +7 because the last 7 pixels of each sbrow can be changed in
2349 // the longest loopfilter of the next sbrow
2350 th = (y + bh + 4 * !!my + 7) >> 5;
2351 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2352 if (x < !!mx * 3 || y < !!my * 3 ||
2353 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2354 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2355 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2357 bw + !!mx * 7, bh + !!my * 7,
2358 x - !!mx * 3, y - !!my * 3, w, h);
2359 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2360 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2362 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2363 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2365 bw + !!mx * 7, bh + !!my * 7,
2366 x - !!mx * 3, y - !!my * 3, w, h);
2367 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2368 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2370 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2371 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2375 static void inter_recon(AVCodecContext *ctx)
2377 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2378 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2379 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2381 VP9Context *s = ctx->priv_data;
2383 int row = s->row, col = s->col;
2384 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]];
2385 AVFrame *ref1 = tref1->f;
2386 ThreadFrame *tref2 = b->comp ? &s->refs[s->refidx[b->ref[1]]] : NULL;
2387 AVFrame *ref2 = b->comp ? tref2->f : NULL;
2388 int w = ctx->width, h = ctx->height;
2389 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2392 if (b->bs > BS_8x8) {
2393 if (b->bs == BS_8x4) {
2394 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2395 ref1->data[0], ref1->linesize[0], tref1,
2396 row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
2397 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2398 s->dst[0] + 4 * ls_y, ls_y,
2399 ref1->data[0], ref1->linesize[0], tref1,
2400 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
2403 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2404 ref2->data[0], ref2->linesize[0], tref2,
2405 row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
2406 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2407 s->dst[0] + 4 * ls_y, ls_y,
2408 ref2->data[0], ref2->linesize[0], tref2,
2409 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
2411 } else if (b->bs == BS_4x8) {
2412 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2413 ref1->data[0], ref1->linesize[0], tref1,
2414 row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
2415 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2416 ref1->data[0], ref1->linesize[0], tref1,
2417 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
2420 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2421 ref2->data[0], ref2->linesize[0], tref2,
2422 row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
2423 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2424 ref2->data[0], ref2->linesize[0], tref2,
2425 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
2428 av_assert2(b->bs == BS_4x4);
2430 // FIXME if two horizontally adjacent blocks have the same MV,
2431 // do a w8 instead of a w4 call
2432 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2433 ref1->data[0], ref1->linesize[0], tref1,
2434 row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
2435 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2436 ref1->data[0], ref1->linesize[0], tref1,
2437 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
2438 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2439 s->dst[0] + 4 * ls_y, ls_y,
2440 ref1->data[0], ref1->linesize[0], tref1,
2441 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
2442 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2443 s->dst[0] + 4 * ls_y + 4, ls_y,
2444 ref1->data[0], ref1->linesize[0], tref1,
2445 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
2448 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2449 ref2->data[0], ref2->linesize[0], tref2,
2450 row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
2451 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2452 ref2->data[0], ref2->linesize[0], tref2,
2453 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
2454 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2455 s->dst[0] + 4 * ls_y, ls_y,
2456 ref2->data[0], ref2->linesize[0], tref2,
2457 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
2458 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2459 s->dst[0] + 4 * ls_y + 4, ls_y,
2460 ref2->data[0], ref2->linesize[0], tref2,
2461 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
2465 int bwl = bwlog_tab[0][b->bs];
2466 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2468 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2469 ref1->data[0], ref1->linesize[0], tref1,
2470 row << 3, col << 3, &b->mv[0][0],bw, bh, w, h);
2473 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2474 ref2->data[0], ref2->linesize[0], tref2,
2475 row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
2480 int bwl = bwlog_tab[1][b->bs];
2481 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2486 if (b->bs > BS_8x8) {
2487 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2488 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2493 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2494 s->dst[1], s->dst[2], ls_uv,
2495 ref1->data[1], ref1->linesize[1],
2496 ref1->data[2], ref1->linesize[2], tref1,
2497 row << 2, col << 2, &mvuv, bw, bh, w, h);
2500 if (b->bs > BS_8x8) {
2501 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2502 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2506 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2507 s->dst[1], s->dst[2], ls_uv,
2508 ref2->data[1], ref2->linesize[1],
2509 ref2->data[2], ref2->linesize[2], tref2,
2510 row << 2, col << 2, &mvuv, bw, bh, w, h);
2515 /* mostly copied intra_reconn() */
2517 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2518 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2519 int end_x = FFMIN(2 * (s->cols - col), w4);
2520 int end_y = FFMIN(2 * (s->rows - row), h4);
2521 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2522 int uvstep1d = 1 << b->uvtx, p;
2523 uint8_t *dst = s->dst[0];
2526 for (n = 0, y = 0; y < end_y; y += step1d) {
2528 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2529 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2532 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2533 s->block + 16 * n, eob);
2535 dst += 4 * s->y_stride * step1d;
2543 step = 1 << (b->uvtx * 2);
2544 for (p = 0; p < 2; p++) {
2545 dst = s->dst[p + 1];
2546 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2548 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2549 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2552 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2553 s->uvblock[p] + 16 * n, eob);
2555 dst += 4 * uvstep1d * s->uv_stride;
2561 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2562 int row_and_7, int col_and_7,
2563 int w, int h, int col_end, int row_end,
2564 enum TxfmMode tx, int skip_inter)
2566 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2567 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2568 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2569 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2571 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2572 // edges. This means that for UV, we work on two subsampled blocks at
2573 // a time, and we only use the topleft block's mode information to set
2574 // things like block strength. Thus, for any block size smaller than
2575 // 16x16, ignore the odd portion of the block.
2576 if (tx == TX_4X4 && is_uv) {
2591 if (tx == TX_4X4 && !skip_inter) {
2592 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2593 int m_col_odd = (t << (w - 1)) - t;
2595 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2597 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2599 for (y = row_and_7; y < h + row_and_7; y++) {
2600 int col_mask_id = 2 - !(y & 7);
2602 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2603 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2604 // for odd lines, if the odd col is not being filtered,
2605 // skip odd row also:
2612 // if a/c are even row/col and b/d are odd, and d is skipped,
2613 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2614 if ((col_end & 1) && (y & 1)) {
2615 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2617 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2621 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2623 for (y = row_and_7; y < h + row_and_7; y++) {
2624 int col_mask_id = 2 - !(y & 3);
2626 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2627 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2628 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2629 lflvl->mask[is_uv][0][y][3] |= m_col;
2630 lflvl->mask[is_uv][1][y][3] |= m_col;
2634 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2637 int mask_id = (tx == TX_8X8);
2638 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2639 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2640 int m_row = m_col & masks[l2];
2642 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2643 // 8wd loopfilter to prevent going off the visible edge.
2644 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2645 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2646 int m_row_8 = m_row - m_row_16;
2648 for (y = row_and_7; y < h + row_and_7; y++) {
2649 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2650 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2653 for (y = row_and_7; y < h + row_and_7; y++)
2654 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2657 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2658 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2659 lflvl->mask[is_uv][1][y][0] |= m_col;
2660 if (y - row_and_7 == h - 1)
2661 lflvl->mask[is_uv][1][y][1] |= m_col;
2663 for (y = row_and_7; y < h + row_and_7; y += step1d)
2664 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2666 } else if (tx != TX_4X4) {
2669 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2670 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2671 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2672 for (y = row_and_7; y < h + row_and_7; y++)
2673 lflvl->mask[is_uv][0][y][mask_id] |= t;
2675 int t8 = t & 0x01, t4 = t - t8;
2677 for (y = row_and_7; y < h + row_and_7; y++) {
2678 lflvl->mask[is_uv][0][y][2] |= t4;
2679 lflvl->mask[is_uv][0][y][1] |= t8;
2681 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2683 int t8 = t & 0x11, t4 = t - t8;
2685 for (y = row_and_7; y < h + row_and_7; y++) {
2686 lflvl->mask[is_uv][0][y][2] |= t4;
2687 lflvl->mask[is_uv][0][y][1] |= t8;
2689 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2694 static void decode_b(AVCodecContext *ctx, int row, int col,
2695 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2696 enum BlockLevel bl, enum BlockPartition bp)
2698 VP9Context *s = ctx->priv_data;
2700 enum BlockSize bs = bl * 3 + bp;
2701 int y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2703 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2709 s->min_mv.x = -(128 + col * 64);
2710 s->min_mv.y = -(128 + row * 64);
2711 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2712 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2718 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2725 memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
2726 memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
2727 for (pl = 0; pl < 2; pl++) {
2728 memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
2729 memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
2734 s->block += w4 * h4 * 64;
2735 s->uvblock[0] += w4 * h4 * 16;
2736 s->uvblock[1] += w4 * h4 * 16;
2737 s->eob += 4 * w4 * h4;
2738 s->uveob[0] += w4 * h4;
2739 s->uveob[1] += w4 * h4;
2745 // emulated overhangs if the stride of the target buffer can't hold. This
2746 // allows to support emu-edge and so on even if we have large block
2748 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2749 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2750 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2751 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2753 s->dst[0] = s->tmp_y;
2756 s->dst[0] = f->data[0] + yoff;
2757 s->y_stride = f->linesize[0];
2760 s->dst[1] = s->tmp_uv[0];
2761 s->dst[2] = s->tmp_uv[1];
2764 s->dst[1] = f->data[1] + uvoff;
2765 s->dst[2] = f->data[2] + uvoff;
2766 s->uv_stride = f->linesize[1];
2769 intra_recon(ctx, yoff, uvoff);
2774 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2776 for (n = 0; o < w; n++) {
2781 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
2782 s->tmp_y + o, 64, h, 0, 0);
2788 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2790 for (n = 1; o < w; n++) {
2795 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
2796 s->tmp_uv[0] + o, 32, h, 0, 0);
2797 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
2798 s->tmp_uv[1] + o, 32, h, 0, 0);
2804 // pick filter level and find edges to apply filter to
2805 if (s->filter.level &&
2806 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
2807 [b->mode[3] != ZEROMV]) > 0) {
2808 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
2809 int skip_inter = !b->intra && b->skip;
2811 for (y = 0; y < h4; y++)
2812 memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
2813 mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
2814 mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
2815 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
2816 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
2817 b->uvtx, skip_inter);
2819 if (!s->filter.lim_lut[lvl]) {
2820 int sharp = s->filter.sharpness;
2824 limit >>= (sharp + 3) >> 2;
2825 limit = FFMIN(limit, 9 - sharp);
2827 limit = FFMAX(limit, 1);
2829 s->filter.lim_lut[lvl] = limit;
2830 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
2836 s->block += w4 * h4 * 64;
2837 s->uvblock[0] += w4 * h4 * 16;
2838 s->uvblock[1] += w4 * h4 * 16;
2839 s->eob += 4 * w4 * h4;
2840 s->uveob[0] += w4 * h4;
2841 s->uveob[1] += w4 * h4;
2845 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2846 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2848 VP9Context *s = ctx->priv_data;
2849 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
2850 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
2851 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
2852 s->prob.p.partition[bl][c];
2853 enum BlockPartition bp;
2854 ptrdiff_t hbs = 4 >> bl;
2855 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2856 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
2859 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2860 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2861 } else if (col + hbs < s->cols) { // FIXME why not <=?
2862 if (row + hbs < s->rows) { // FIXME why not <=?
2863 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2865 case PARTITION_NONE:
2866 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2869 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2870 yoff += hbs * 8 * y_stride;
2871 uvoff += hbs * 4 * uv_stride;
2872 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
2875 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2878 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
2880 case PARTITION_SPLIT:
2881 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2882 decode_sb(ctx, row, col + hbs, lflvl,
2883 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2884 yoff += hbs * 8 * y_stride;
2885 uvoff += hbs * 4 * uv_stride;
2886 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2887 decode_sb(ctx, row + hbs, col + hbs, lflvl,
2888 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2893 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
2894 bp = PARTITION_SPLIT;
2895 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2896 decode_sb(ctx, row, col + hbs, lflvl,
2897 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2900 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2902 } else if (row + hbs < s->rows) { // FIXME why not <=?
2903 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
2904 bp = PARTITION_SPLIT;
2905 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2906 yoff += hbs * 8 * y_stride;
2907 uvoff += hbs * 4 * uv_stride;
2908 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2911 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2914 bp = PARTITION_SPLIT;
2915 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2917 s->counts.partition[bl][c][bp]++;
2920 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2921 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2923 VP9Context *s = ctx->priv_data;
2925 ptrdiff_t hbs = 4 >> bl;
2926 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2927 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
2930 av_assert2(b->bl == BL_8X8);
2931 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
2932 } else if (s->b->bl == bl) {
2933 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
2934 if (b->bp == PARTITION_H && row + hbs < s->rows) {
2935 yoff += hbs * 8 * y_stride;
2936 uvoff += hbs * 4 * uv_stride;
2937 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
2938 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
2941 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
2944 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2945 if (col + hbs < s->cols) { // FIXME why not <=?
2946 if (row + hbs < s->rows) {
2947 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
2948 uvoff + 4 * hbs, bl + 1);
2949 yoff += hbs * 8 * y_stride;
2950 uvoff += hbs * 4 * uv_stride;
2951 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2952 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
2953 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2957 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
2959 } else if (row + hbs < s->rows) {
2960 yoff += hbs * 8 * y_stride;
2961 uvoff += hbs * 4 * uv_stride;
2962 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2967 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
2968 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
2970 VP9Context *s = ctx->priv_data;
2971 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2972 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
2973 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
2976 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
2977 // if you think of them as acting on a 8x8 block max, we can interleave
2978 // each v/h within the single x loop, but that only works if we work on
2979 // 8 pixel blocks, and we won't always do that (we want at least 16px
2980 // to use SSE2 optimizations, perhaps 32 for AVX2)
2982 // filter edges between columns, Y plane (e.g. block1 | block2)
2983 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
2984 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
2985 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
2986 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
2987 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
2988 unsigned hm = hm1 | hm2 | hm13 | hm23;
2990 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
2992 int L = *l, H = L >> 4;
2993 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2996 if (hmask1[0] & x) {
2997 if (hmask2[0] & x) {
2998 av_assert2(l[8] == L);
2999 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3001 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3003 } else if (hm2 & x) {
3006 E |= s->filter.mblim_lut[L] << 8;
3007 I |= s->filter.lim_lut[L] << 8;
3008 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3010 [0](ptr, ls_y, E, I, H);
3012 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3013 [0](ptr, ls_y, E, I, H);
3016 } else if (hm2 & x) {
3017 int L = l[8], H = L >> 4;
3018 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3021 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3022 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3026 int L = *l, H = L >> 4;
3027 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3032 E |= s->filter.mblim_lut[L] << 8;
3033 I |= s->filter.lim_lut[L] << 8;
3034 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3036 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3038 } else if (hm23 & x) {
3039 int L = l[8], H = L >> 4;
3040 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3042 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3048 // filter edges between rows, Y plane (e.g. ------)
3050 dst = f->data[0] + yoff;
3052 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3053 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3054 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3056 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3059 int L = *l, H = L >> 4;
3060 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3063 if (vmask[0] & (x << 1)) {
3064 av_assert2(l[1] == L);
3065 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3067 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3069 } else if (vm & (x << 1)) {
3072 E |= s->filter.mblim_lut[L] << 8;
3073 I |= s->filter.lim_lut[L] << 8;
3074 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3075 [!!(vmask[1] & (x << 1))]
3076 [1](ptr, ls_y, E, I, H);
3078 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3079 [1](ptr, ls_y, E, I, H);
3081 } else if (vm & (x << 1)) {
3082 int L = l[1], H = L >> 4;
3083 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3085 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3086 [1](ptr + 8, ls_y, E, I, H);
3090 int L = *l, H = L >> 4;
3091 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3093 if (vm3 & (x << 1)) {
3096 E |= s->filter.mblim_lut[L] << 8;
3097 I |= s->filter.lim_lut[L] << 8;
3098 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3100 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3102 } else if (vm3 & (x << 1)) {
3103 int L = l[1], H = L >> 4;
3104 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3106 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3111 // same principle but for U/V planes
3112 for (p = 0; p < 2; p++) {
3114 dst = f->data[1 + p] + uvoff;
3115 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3116 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3117 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3118 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3119 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3121 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3124 int L = *l, H = L >> 4;
3125 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3127 if (hmask1[0] & x) {
3128 if (hmask2[0] & x) {
3129 av_assert2(l[16] == L);
3130 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3132 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3134 } else if (hm2 & x) {
3137 E |= s->filter.mblim_lut[L] << 8;
3138 I |= s->filter.lim_lut[L] << 8;
3139 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3141 [0](ptr, ls_uv, E, I, H);
3143 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3144 [0](ptr, ls_uv, E, I, H);
3146 } else if (hm2 & x) {
3147 int L = l[16], H = L >> 4;
3148 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3150 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3151 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3159 dst = f->data[1 + p] + uvoff;
3160 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3161 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3162 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3164 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3167 int L = *l, H = L >> 4;
3168 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3171 if (vmask[0] & (x << 2)) {
3172 av_assert2(l[2] == L);
3173 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3175 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3177 } else if (vm & (x << 2)) {
3180 E |= s->filter.mblim_lut[L] << 8;
3181 I |= s->filter.lim_lut[L] << 8;
3182 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3183 [!!(vmask[1] & (x << 2))]
3184 [1](ptr, ls_uv, E, I, H);
3186 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3187 [1](ptr, ls_uv, E, I, H);
3189 } else if (vm & (x << 2)) {
3190 int L = l[2], H = L >> 4;
3191 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3193 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3194 [1](ptr + 8, ls_uv, E, I, H);
3204 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3206 int sb_start = ( idx * n) >> log2_n;
3207 int sb_end = ((idx + 1) * n) >> log2_n;
3208 *start = FFMIN(sb_start, n) << 3;
3209 *end = FFMIN(sb_end, n) << 3;
3212 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3213 int max_count, int update_factor)
3215 unsigned ct = ct0 + ct1, p2, p1;
3221 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3222 p2 = av_clip(p2, 1, 255);
3223 ct = FFMIN(ct, max_count);
3224 update_factor = FASTDIV(update_factor * ct, max_count);
3226 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3227 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3230 static void adapt_probs(VP9Context *s)
3233 prob_context *p = &s->prob_ctx[s->framectxid].p;
3234 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3237 for (i = 0; i < 4; i++)
3238 for (j = 0; j < 2; j++)
3239 for (k = 0; k < 2; k++)
3240 for (l = 0; l < 6; l++)
3241 for (m = 0; m < 6; m++) {
3242 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3243 unsigned *e = s->counts.eob[i][j][k][l][m];
3244 unsigned *c = s->counts.coef[i][j][k][l][m];
3246 if (l == 0 && m >= 3) // dc only has 3 pt
3249 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3250 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3251 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3254 if (s->keyframe || s->intraonly) {
3255 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3256 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3257 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3258 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3263 for (i = 0; i < 3; i++)
3264 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3267 for (i = 0; i < 4; i++)
3268 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3271 if (s->comppredmode == PRED_SWITCHABLE) {
3272 for (i = 0; i < 5; i++)
3273 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3277 if (s->comppredmode != PRED_SINGLEREF) {
3278 for (i = 0; i < 5; i++)
3279 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3280 s->counts.comp_ref[i][1], 20, 128);
3283 if (s->comppredmode != PRED_COMPREF) {
3284 for (i = 0; i < 5; i++) {
3285 uint8_t *pp = p->single_ref[i];
3286 unsigned (*c)[2] = s->counts.single_ref[i];
3288 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3289 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3293 // block partitioning
3294 for (i = 0; i < 4; i++)
3295 for (j = 0; j < 4; j++) {
3296 uint8_t *pp = p->partition[i][j];
3297 unsigned *c = s->counts.partition[i][j];
3299 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3300 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3301 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3305 if (s->txfmmode == TX_SWITCHABLE) {
3306 for (i = 0; i < 2; i++) {
3307 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3309 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3310 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3311 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3312 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3313 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3314 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3318 // interpolation filter
3319 if (s->filtermode == FILTER_SWITCHABLE) {
3320 for (i = 0; i < 4; i++) {
3321 uint8_t *pp = p->filter[i];
3322 unsigned *c = s->counts.filter[i];
3324 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3325 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3330 for (i = 0; i < 7; i++) {
3331 uint8_t *pp = p->mv_mode[i];
3332 unsigned *c = s->counts.mv_mode[i];
3334 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3335 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3336 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3341 uint8_t *pp = p->mv_joint;
3342 unsigned *c = s->counts.mv_joint;
3344 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3345 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3346 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3350 for (i = 0; i < 2; i++) {
3352 unsigned *c, (*c2)[2], sum;
3354 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3355 s->counts.mv_comp[i].sign[1], 20, 128);
3357 pp = p->mv_comp[i].classes;
3358 c = s->counts.mv_comp[i].classes;
3359 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3360 adapt_prob(&pp[0], c[0], sum, 20, 128);
3362 adapt_prob(&pp[1], c[1], sum, 20, 128);
3364 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3365 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3367 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3368 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3370 adapt_prob(&pp[6], c[6], sum, 20, 128);
3371 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3372 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3373 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3375 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3376 s->counts.mv_comp[i].class0[1], 20, 128);
3377 pp = p->mv_comp[i].bits;
3378 c2 = s->counts.mv_comp[i].bits;
3379 for (j = 0; j < 10; j++)
3380 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3382 for (j = 0; j < 2; j++) {
3383 pp = p->mv_comp[i].class0_fp[j];
3384 c = s->counts.mv_comp[i].class0_fp[j];
3385 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3386 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3387 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3389 pp = p->mv_comp[i].fp;
3390 c = s->counts.mv_comp[i].fp;
3391 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3392 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3393 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3395 if (s->highprecisionmvs) {
3396 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3397 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3398 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3399 s->counts.mv_comp[i].hp[1], 20, 128);
3404 for (i = 0; i < 4; i++) {
3405 uint8_t *pp = p->y_mode[i];
3406 unsigned *c = s->counts.y_mode[i], sum, s2;
3408 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3409 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3410 sum -= c[TM_VP8_PRED];
3411 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3412 sum -= c[VERT_PRED];
3413 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3414 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3416 adapt_prob(&pp[3], s2, sum, 20, 128);
3418 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3419 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3420 sum -= c[DIAG_DOWN_LEFT_PRED];
3421 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3422 sum -= c[VERT_LEFT_PRED];
3423 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3424 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3428 for (i = 0; i < 10; i++) {
3429 uint8_t *pp = p->uv_mode[i];
3430 unsigned *c = s->counts.uv_mode[i], sum, s2;
3432 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3433 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3434 sum -= c[TM_VP8_PRED];
3435 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3436 sum -= c[VERT_PRED];
3437 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3438 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3440 adapt_prob(&pp[3], s2, sum, 20, 128);
3442 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3443 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3444 sum -= c[DIAG_DOWN_LEFT_PRED];
3445 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3446 sum -= c[VERT_LEFT_PRED];
3447 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3448 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3452 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3454 VP9Context *s = ctx->priv_data;
3457 for (i = 0; i < 2; i++) {
3458 if (s->frames[i].tf.f->data[0])
3459 vp9_unref_frame(ctx, &s->frames[i]);
3460 av_frame_free(&s->frames[i].tf.f);
3462 for (i = 0; i < 8; i++) {
3463 if (s->refs[i].f->data[0])
3464 ff_thread_release_buffer(ctx, &s->refs[i]);
3465 av_frame_free(&s->refs[i].f);
3466 if (s->next_refs[i].f->data[0])
3467 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3468 av_frame_free(&s->next_refs[i].f);
3470 av_freep(&s->above_partition_ctx);
3473 av_freep(&s->b_base);
3474 av_freep(&s->block_base);
3480 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3481 int *got_frame, AVPacket *pkt)
3483 const uint8_t *data = pkt->data;
3484 int size = pkt->size;
3485 VP9Context *s = ctx->priv_data;
3486 int res, tile_row, tile_col, i, ref, row, col;
3487 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3490 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3492 } else if (res == 0) {
3493 if (!s->refs[ref].f->data[0]) {
3494 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3495 return AVERROR_INVALIDDATA;
3497 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3505 if (s->frames[LAST_FRAME].tf.f->data[0])
3506 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3507 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3508 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3510 if (s->frames[CUR_FRAME].tf.f->data[0])
3511 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3512 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3514 f = s->frames[CUR_FRAME].tf.f;
3515 f->key_frame = s->keyframe;
3516 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3517 ls_y = f->linesize[0];
3518 ls_uv =f->linesize[1];
3521 for (i = 0; i < 8; i++) {
3522 if (s->next_refs[i].f->data[0])
3523 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3524 if (s->refreshrefmask & (1 << i)) {
3525 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3527 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3533 // main tile decode loop
3534 memset(s->above_partition_ctx, 0, s->cols);
3535 memset(s->above_skip_ctx, 0, s->cols);
3536 if (s->keyframe || s->intraonly) {
3537 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3539 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3541 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3542 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3543 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3544 memset(s->above_segpred_ctx, 0, s->cols);
3545 s->pass = s->uses_2pass =
3546 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3547 if (s->refreshctx && s->parallelmode) {
3550 for (i = 0; i < 4; i++)
3551 for (j = 0; j < 2; j++)
3552 for (k = 0; k < 2; k++)
3553 for (l = 0; l < 6; l++)
3554 for (m = 0; m < 6; m++)
3555 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3556 s->prob.coef[i][j][k][l][m], 3);
3557 s->prob_ctx[s->framectxid].p = s->prob.p;
3558 ff_thread_finish_setup(ctx);
3564 s->block = s->block_base;
3565 s->uvblock[0] = s->uvblock_base[0];
3566 s->uvblock[1] = s->uvblock_base[1];
3567 s->eob = s->eob_base;
3568 s->uveob[0] = s->uveob_base[0];
3569 s->uveob[1] = s->uveob_base[1];
3571 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3572 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3573 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3575 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3578 if (tile_col == s->tiling.tile_cols - 1 &&
3579 tile_row == s->tiling.tile_rows - 1) {
3582 tile_size = AV_RB32(data);
3586 if (tile_size > size)
3587 return AVERROR_INVALIDDATA;
3588 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3589 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
3590 return AVERROR_INVALIDDATA;
3596 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3597 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3598 struct VP9Filter *lflvl_ptr = s->lflvl;
3599 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3601 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3602 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3603 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3606 memset(s->left_partition_ctx, 0, 8);
3607 memset(s->left_skip_ctx, 0, 8);
3608 if (s->keyframe || s->intraonly) {
3609 memset(s->left_mode_ctx, DC_PRED, 16);
3611 memset(s->left_mode_ctx, NEARESTMV, 8);
3613 memset(s->left_y_nnz_ctx, 0, 16);
3614 memset(s->left_uv_nnz_ctx, 0, 16);
3615 memset(s->left_segpred_ctx, 0, 8);
3617 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3620 for (col = s->tiling.tile_col_start;
3621 col < s->tiling.tile_col_end;
3622 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3623 // FIXME integrate with lf code (i.e. zero after each
3624 // use, similar to invtxfm coefficients, or similar)
3626 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3630 decode_sb_mem(ctx, row, col, lflvl_ptr,
3631 yoff2, uvoff2, BL_64X64);
3633 decode_sb(ctx, row, col, lflvl_ptr,
3634 yoff2, uvoff2, BL_64X64);
3638 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3646 // backup pre-loopfilter reconstruction data for intra
3647 // prediction of next row of sb64s
3648 if (row + 8 < s->rows) {
3649 memcpy(s->intra_pred_data[0],
3650 f->data[0] + yoff + 63 * ls_y,
3652 memcpy(s->intra_pred_data[1],
3653 f->data[1] + uvoff + 31 * ls_uv,
3655 memcpy(s->intra_pred_data[2],
3656 f->data[2] + uvoff + 31 * ls_uv,
3660 // loopfilter one row
3661 if (s->filter.level) {
3664 lflvl_ptr = s->lflvl;
3665 for (col = 0; col < s->cols;
3666 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3667 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3671 // FIXME maybe we can make this more finegrained by running the
3672 // loopfilter per-block instead of after each sbrow
3673 // In fact that would also make intra pred left preparation easier?
3674 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3678 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3680 ff_thread_finish_setup(ctx);
3682 } while (s->pass++ == 1);
3683 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3686 for (i = 0; i < 8; i++) {
3687 if (s->refs[i].f->data[0])
3688 ff_thread_release_buffer(ctx, &s->refs[i]);
3689 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3692 if (!s->invisible) {
3693 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3701 static void vp9_decode_flush(AVCodecContext *ctx)
3703 VP9Context *s = ctx->priv_data;
3706 for (i = 0; i < 2; i++)
3707 vp9_unref_frame(ctx, &s->frames[i]);
3708 for (i = 0; i < 8; i++)
3709 ff_thread_release_buffer(ctx, &s->refs[i]);
3712 static int init_frames(AVCodecContext *ctx)
3714 VP9Context *s = ctx->priv_data;
3717 for (i = 0; i < 2; i++) {
3718 s->frames[i].tf.f = av_frame_alloc();
3719 if (!s->frames[i].tf.f) {
3720 vp9_decode_free(ctx);
3721 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3722 return AVERROR(ENOMEM);
3725 for (i = 0; i < 8; i++) {
3726 s->refs[i].f = av_frame_alloc();
3727 s->next_refs[i].f = av_frame_alloc();
3728 if (!s->refs[i].f || !s->next_refs[i].f) {
3729 vp9_decode_free(ctx);
3730 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3731 return AVERROR(ENOMEM);
3738 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3740 VP9Context *s = ctx->priv_data;
3742 ctx->internal->allocate_progress = 1;
3743 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3744 ff_vp9dsp_init(&s->dsp);
3745 ff_videodsp_init(&s->vdsp, 8);
3746 s->filter.sharpness = -1;
3748 return init_frames(ctx);
3751 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
3753 return init_frames(avctx);
3756 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
3759 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
3761 // FIXME scalability, size, etc.
3763 for (i = 0; i < 2; i++) {
3764 if (s->frames[i].tf.f->data[0])
3765 vp9_unref_frame(dst, &s->frames[i]);
3766 if (ssrc->frames[i].tf.f->data[0]) {
3767 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
3771 for (i = 0; i < 8; i++) {
3772 if (s->refs[i].f->data[0])
3773 ff_thread_release_buffer(dst, &s->refs[i]);
3774 if (ssrc->next_refs[i].f->data[0]) {
3775 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
3780 s->invisible = ssrc->invisible;
3781 s->keyframe = ssrc->keyframe;
3782 s->uses_2pass = ssrc->uses_2pass;
3783 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
3784 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
3785 if (ssrc->segmentation.enabled) {
3786 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
3787 sizeof(s->segmentation.feat));
3793 AVCodec ff_vp9_decoder = {
3795 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
3796 .type = AVMEDIA_TYPE_VIDEO,
3797 .id = AV_CODEC_ID_VP9,
3798 .priv_data_size = sizeof(VP9Context),
3799 .init = vp9_decode_init,
3800 .close = vp9_decode_free,
3801 .decode = vp9_decode_frame,
3802 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
3803 .flush = vp9_decode_flush,
3804 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
3805 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),