2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
151 uint8_t absolute_vals;
157 uint8_t skip_enabled;
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 unsigned sb_cols, sb_rows, rows, cols;
173 uint8_t coef[4][2][2][6][6][3];
177 uint8_t coef[4][2][2][6][6][11];
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
194 unsigned mv_joint[4];
197 unsigned classes[11];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
202 unsigned class0_hp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
212 // contextual (left/above) cache
213 uint8_t left_partition_ctx[8], *above_partition_ctx;
214 uint8_t left_mode_ctx[16], *above_mode_ctx;
215 // FIXME maybe merge some of the below in a flags field?
216 uint8_t left_y_nnz_ctx[16], *above_y_nnz_ctx;
217 uint8_t left_uv_nnz_ctx[2][8], *above_uv_nnz_ctx[2];
218 uint8_t left_skip_ctx[8], *above_skip_ctx; // 1bit
219 uint8_t left_txfm_ctx[8], *above_txfm_ctx; // 2bit
220 uint8_t left_segpred_ctx[8], *above_segpred_ctx; // 1bit
221 uint8_t left_intra_ctx[8], *above_intra_ctx; // 1bit
222 uint8_t left_comp_ctx[8], *above_comp_ctx; // 1bit
223 uint8_t left_ref_ctx[8], *above_ref_ctx; // 2bit
224 uint8_t left_filter_ctx[8], *above_filter_ctx;
225 VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
228 uint8_t *intra_pred_data[3];
229 struct VP9Filter *lflvl;
230 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
232 // block reconstruction intermediates
233 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
234 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
235 struct { int x, y; } min_mv, max_mv;
236 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
237 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
240 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
242 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
243 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
245 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
246 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
250 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
252 VP9Context *s = ctx->priv_data;
255 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
257 sz = 64 * s->sb_cols * s->sb_rows;
258 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
259 ff_thread_release_buffer(ctx, &f->tf);
260 return AVERROR(ENOMEM);
263 f->segmentation_map = f->extradata->data;
264 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
266 // retain segmentation map if it doesn't update
267 if (s->segmentation.enabled && !s->segmentation.update_map &&
268 !s->keyframe && !s->intraonly) {
269 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
275 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
277 ff_thread_release_buffer(ctx, &f->tf);
278 av_buffer_unref(&f->extradata);
281 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
285 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
287 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
288 vp9_unref_frame(ctx, dst);
289 return AVERROR(ENOMEM);
292 dst->segmentation_map = src->segmentation_map;
298 static int update_size(AVCodecContext *ctx, int w, int h)
300 VP9Context *s = ctx->priv_data;
303 av_assert0(w > 0 && h > 0);
305 if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
310 s->sb_cols = (w + 63) >> 6;
311 s->sb_rows = (h + 63) >> 6;
312 s->cols = (w + 7) >> 3;
313 s->rows = (h + 7) >> 3;
315 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
316 av_freep(&s->above_partition_ctx);
317 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
319 return AVERROR(ENOMEM);
320 assign(s->above_partition_ctx, uint8_t *, 8);
321 assign(s->above_skip_ctx, uint8_t *, 8);
322 assign(s->above_txfm_ctx, uint8_t *, 8);
323 assign(s->above_mode_ctx, uint8_t *, 16);
324 assign(s->above_y_nnz_ctx, uint8_t *, 16);
325 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
326 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
327 assign(s->intra_pred_data[0], uint8_t *, 64);
328 assign(s->intra_pred_data[1], uint8_t *, 32);
329 assign(s->intra_pred_data[2], uint8_t *, 32);
330 assign(s->above_segpred_ctx, uint8_t *, 8);
331 assign(s->above_intra_ctx, uint8_t *, 8);
332 assign(s->above_comp_ctx, uint8_t *, 8);
333 assign(s->above_ref_ctx, uint8_t *, 8);
334 assign(s->above_filter_ctx, uint8_t *, 8);
335 assign(s->lflvl, struct VP9Filter *, 1);
336 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
340 av_free(s->block_base);
341 if (ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode) {
342 int sbs = s->sb_cols * s->sb_rows;
344 s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
345 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
346 if (!s->b_base || !s->block_base)
347 return AVERROR(ENOMEM);
348 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
349 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
350 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
351 s->uveob_base[0] = s->eob_base + 256 * sbs;
352 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
354 s->b_base = av_malloc(sizeof(VP9Block));
355 s->block_base = av_mallocz((64 * 64 + 128) * 3);
356 if (!s->b_base || !s->block_base)
357 return AVERROR(ENOMEM);
358 s->uvblock_base[0] = s->block_base + 64 * 64;
359 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
360 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
361 s->uveob_base[0] = s->eob_base + 256;
362 s->uveob_base[1] = s->uveob_base[0] + 64;
368 // for some reason the sign bit is at the end, not the start, of a bit sequence
369 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
371 int v = get_bits(gb, n);
372 return get_bits1(gb) ? -v : v;
375 static av_always_inline int inv_recenter_nonneg(int v, int m)
377 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
380 // differential forward probability updates
381 static int update_prob(VP56RangeCoder *c, int p)
383 static const int inv_map_table[254] = {
384 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
385 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
386 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
387 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
388 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
389 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
390 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
391 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
392 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
393 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
394 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
395 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
396 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
397 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
398 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
399 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
400 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
401 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
406 /* This code is trying to do a differential probability update. For a
407 * current probability A in the range [1, 255], the difference to a new
408 * probability of any value can be expressed differentially as 1-A,255-A
409 * where some part of this (absolute range) exists both in positive as
410 * well as the negative part, whereas another part only exists in one
411 * half. We're trying to code this shared part differentially, i.e.
412 * times two where the value of the lowest bit specifies the sign, and
413 * the single part is then coded on top of this. This absolute difference
414 * then again has a value of [0,254], but a bigger value in this range
415 * indicates that we're further away from the original value A, so we
416 * can code this as a VLC code, since higher values are increasingly
417 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
418 * updates vs. the 'fine, exact' updates further down the range, which
419 * adds one extra dimension to this differential update model. */
421 if (!vp8_rac_get(c)) {
422 d = vp8_rac_get_uint(c, 4) + 0;
423 } else if (!vp8_rac_get(c)) {
424 d = vp8_rac_get_uint(c, 4) + 16;
425 } else if (!vp8_rac_get(c)) {
426 d = vp8_rac_get_uint(c, 5) + 32;
428 d = vp8_rac_get_uint(c, 7);
430 d = (d << 1) - 65 + vp8_rac_get(c);
434 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
435 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
438 static int decode_frame_header(AVCodecContext *ctx,
439 const uint8_t *data, int size, int *ref)
441 VP9Context *s = ctx->priv_data;
442 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
444 const uint8_t *data2;
447 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
448 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
451 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
452 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
453 return AVERROR_INVALIDDATA;
455 s->profile = get_bits1(&s->gb);
456 if (get_bits1(&s->gb)) { // reserved bit
457 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
458 return AVERROR_INVALIDDATA;
460 if (get_bits1(&s->gb)) {
461 *ref = get_bits(&s->gb, 3);
464 s->last_uses_2pass = s->uses_2pass;
465 s->last_keyframe = s->keyframe;
466 s->keyframe = !get_bits1(&s->gb);
467 last_invisible = s->invisible;
468 s->invisible = !get_bits1(&s->gb);
469 s->errorres = get_bits1(&s->gb);
470 s->use_last_frame_mvs = !s->errorres && !last_invisible;
472 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
473 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
474 return AVERROR_INVALIDDATA;
476 s->colorspace = get_bits(&s->gb, 3);
477 if (s->colorspace == 7) { // RGB = profile 1
478 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
479 return AVERROR_INVALIDDATA;
481 s->fullrange = get_bits1(&s->gb);
482 // for profile 1, here follows the subsampling bits
483 s->refreshrefmask = 0xff;
484 w = get_bits(&s->gb, 16) + 1;
485 h = get_bits(&s->gb, 16) + 1;
486 if (get_bits1(&s->gb)) // display size
487 skip_bits(&s->gb, 32);
489 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
490 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
492 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
493 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
494 return AVERROR_INVALIDDATA;
496 s->refreshrefmask = get_bits(&s->gb, 8);
497 w = get_bits(&s->gb, 16) + 1;
498 h = get_bits(&s->gb, 16) + 1;
499 if (get_bits1(&s->gb)) // display size
500 skip_bits(&s->gb, 32);
502 s->refreshrefmask = get_bits(&s->gb, 8);
503 s->refidx[0] = get_bits(&s->gb, 3);
504 s->signbias[0] = get_bits1(&s->gb);
505 s->refidx[1] = get_bits(&s->gb, 3);
506 s->signbias[1] = get_bits1(&s->gb);
507 s->refidx[2] = get_bits(&s->gb, 3);
508 s->signbias[2] = get_bits1(&s->gb);
509 if (!s->refs[s->refidx[0]].f->data[0] ||
510 !s->refs[s->refidx[1]].f->data[0] ||
511 !s->refs[s->refidx[2]].f->data[0]) {
512 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
513 return AVERROR_INVALIDDATA;
515 if (get_bits1(&s->gb)) {
516 w = s->refs[s->refidx[0]].f->width;
517 h = s->refs[s->refidx[0]].f->height;
518 } else if (get_bits1(&s->gb)) {
519 w = s->refs[s->refidx[1]].f->width;
520 h = s->refs[s->refidx[1]].f->height;
521 } else if (get_bits1(&s->gb)) {
522 w = s->refs[s->refidx[2]].f->width;
523 h = s->refs[s->refidx[2]].f->height;
525 w = get_bits(&s->gb, 16) + 1;
526 h = get_bits(&s->gb, 16) + 1;
528 s->use_last_frame_mvs &= s->frames[LAST_FRAME].tf.f->width == w &&
529 s->frames[LAST_FRAME].tf.f->height == h;
530 if (get_bits1(&s->gb)) // display size
531 skip_bits(&s->gb, 32);
532 s->highprecisionmvs = get_bits1(&s->gb);
533 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
535 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
536 s->signbias[0] != s->signbias[2];
537 if (s->allowcompinter) {
538 if (s->signbias[0] == s->signbias[1]) {
540 s->varcompref[0] = 0;
541 s->varcompref[1] = 1;
542 } else if (s->signbias[0] == s->signbias[2]) {
544 s->varcompref[0] = 0;
545 s->varcompref[1] = 2;
548 s->varcompref[0] = 1;
549 s->varcompref[1] = 2;
554 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
555 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
556 s->framectxid = c = get_bits(&s->gb, 2);
558 /* loopfilter header data */
559 s->filter.level = get_bits(&s->gb, 6);
560 sharp = get_bits(&s->gb, 3);
561 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
562 // the old cache values since they are still valid
563 if (s->filter.sharpness != sharp)
564 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
565 s->filter.sharpness = sharp;
566 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
567 if (get_bits1(&s->gb)) {
568 for (i = 0; i < 4; i++)
569 if (get_bits1(&s->gb))
570 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
571 for (i = 0; i < 2; i++)
572 if (get_bits1(&s->gb))
573 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
576 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
579 /* quantization header data */
580 s->yac_qi = get_bits(&s->gb, 8);
581 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
582 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
583 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
584 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
585 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
587 /* segmentation header info */
588 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
589 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
590 for (i = 0; i < 7; i++)
591 s->prob.seg[i] = get_bits1(&s->gb) ?
592 get_bits(&s->gb, 8) : 255;
593 if ((s->segmentation.temporal = get_bits1(&s->gb)))
594 for (i = 0; i < 3; i++)
595 s->prob.segpred[i] = get_bits1(&s->gb) ?
596 get_bits(&s->gb, 8) : 255;
599 if (get_bits1(&s->gb)) {
600 s->segmentation.absolute_vals = get_bits1(&s->gb);
601 for (i = 0; i < 8; i++) {
602 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
603 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
604 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
605 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
606 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
607 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
608 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
612 s->segmentation.feat[0].q_enabled = 0;
613 s->segmentation.feat[0].lf_enabled = 0;
614 s->segmentation.feat[0].skip_enabled = 0;
615 s->segmentation.feat[0].ref_enabled = 0;
618 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
619 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
620 int qyac, qydc, quvac, quvdc, lflvl, sh;
622 if (s->segmentation.feat[i].q_enabled) {
623 if (s->segmentation.absolute_vals)
624 qyac = s->segmentation.feat[i].q_val;
626 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
630 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
631 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
632 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
633 qyac = av_clip_uintp2(qyac, 8);
635 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
636 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
637 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
638 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
640 sh = s->filter.level >= 32;
641 if (s->segmentation.feat[i].lf_enabled) {
642 if (s->segmentation.absolute_vals)
643 lflvl = s->segmentation.feat[i].lf_val;
645 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
647 lflvl = s->filter.level;
649 s->segmentation.feat[i].lflvl[0][0] =
650 s->segmentation.feat[i].lflvl[0][1] =
651 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
652 for (j = 1; j < 4; j++) {
653 s->segmentation.feat[i].lflvl[j][0] =
654 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
655 s->lf_delta.mode[0]) << sh), 6);
656 s->segmentation.feat[i].lflvl[j][1] =
657 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
658 s->lf_delta.mode[1]) << sh), 6);
663 if ((res = update_size(ctx, w, h)) < 0) {
664 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
667 for (s->tiling.log2_tile_cols = 0;
668 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
669 s->tiling.log2_tile_cols++) ;
670 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
671 max = FFMAX(0, max - 1);
672 while (max > s->tiling.log2_tile_cols) {
673 if (get_bits1(&s->gb))
674 s->tiling.log2_tile_cols++;
678 s->tiling.log2_tile_rows = decode012(&s->gb);
679 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
680 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
681 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
682 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
683 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
685 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
686 return AVERROR(ENOMEM);
690 if (s->keyframe || s->errorres || s->intraonly) {
691 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
692 s->prob_ctx[3].p = vp9_default_probs;
693 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
694 sizeof(vp9_default_coef_probs));
695 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
696 sizeof(vp9_default_coef_probs));
697 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
698 sizeof(vp9_default_coef_probs));
699 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
700 sizeof(vp9_default_coef_probs));
703 // next 16 bits is size of the rest of the header (arith-coded)
704 size2 = get_bits(&s->gb, 16);
705 data2 = align_get_bits(&s->gb);
706 if (size2 > size - (data2 - data)) {
707 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
708 return AVERROR_INVALIDDATA;
710 ff_vp56_init_range_decoder(&s->c, data2, size2);
711 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
712 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
713 return AVERROR_INVALIDDATA;
716 if (s->keyframe || s->intraonly) {
717 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
719 memset(&s->counts, 0, sizeof(s->counts));
721 // FIXME is it faster to not copy here, but do it down in the fw updates
722 // as explicit copies if the fw update is missing (and skip the copy upon
724 s->prob.p = s->prob_ctx[c].p;
728 s->txfmmode = TX_4X4;
730 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
731 if (s->txfmmode == 3)
732 s->txfmmode += vp8_rac_get(&s->c);
734 if (s->txfmmode == TX_SWITCHABLE) {
735 for (i = 0; i < 2; i++)
736 if (vp56_rac_get_prob_branchy(&s->c, 252))
737 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
738 for (i = 0; i < 2; i++)
739 for (j = 0; j < 2; j++)
740 if (vp56_rac_get_prob_branchy(&s->c, 252))
741 s->prob.p.tx16p[i][j] =
742 update_prob(&s->c, s->prob.p.tx16p[i][j]);
743 for (i = 0; i < 2; i++)
744 for (j = 0; j < 3; j++)
745 if (vp56_rac_get_prob_branchy(&s->c, 252))
746 s->prob.p.tx32p[i][j] =
747 update_prob(&s->c, s->prob.p.tx32p[i][j]);
752 for (i = 0; i < 4; i++) {
753 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
754 if (vp8_rac_get(&s->c)) {
755 for (j = 0; j < 2; j++)
756 for (k = 0; k < 2; k++)
757 for (l = 0; l < 6; l++)
758 for (m = 0; m < 6; m++) {
759 uint8_t *p = s->prob.coef[i][j][k][l][m];
760 uint8_t *r = ref[j][k][l][m];
761 if (m >= 3 && l == 0) // dc only has 3 pt
763 for (n = 0; n < 3; n++) {
764 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
765 p[n] = update_prob(&s->c, r[n]);
773 for (j = 0; j < 2; j++)
774 for (k = 0; k < 2; k++)
775 for (l = 0; l < 6; l++)
776 for (m = 0; m < 6; m++) {
777 uint8_t *p = s->prob.coef[i][j][k][l][m];
778 uint8_t *r = ref[j][k][l][m];
779 if (m > 3 && l == 0) // dc only has 3 pt
785 if (s->txfmmode == i)
790 for (i = 0; i < 3; i++)
791 if (vp56_rac_get_prob_branchy(&s->c, 252))
792 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
793 if (!s->keyframe && !s->intraonly) {
794 for (i = 0; i < 7; i++)
795 for (j = 0; j < 3; j++)
796 if (vp56_rac_get_prob_branchy(&s->c, 252))
797 s->prob.p.mv_mode[i][j] =
798 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
800 if (s->filtermode == FILTER_SWITCHABLE)
801 for (i = 0; i < 4; i++)
802 for (j = 0; j < 2; j++)
803 if (vp56_rac_get_prob_branchy(&s->c, 252))
804 s->prob.p.filter[i][j] =
805 update_prob(&s->c, s->prob.p.filter[i][j]);
807 for (i = 0; i < 4; i++)
808 if (vp56_rac_get_prob_branchy(&s->c, 252))
809 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
811 if (s->allowcompinter) {
812 s->comppredmode = vp8_rac_get(&s->c);
814 s->comppredmode += vp8_rac_get(&s->c);
815 if (s->comppredmode == PRED_SWITCHABLE)
816 for (i = 0; i < 5; i++)
817 if (vp56_rac_get_prob_branchy(&s->c, 252))
819 update_prob(&s->c, s->prob.p.comp[i]);
821 s->comppredmode = PRED_SINGLEREF;
824 if (s->comppredmode != PRED_COMPREF) {
825 for (i = 0; i < 5; i++) {
826 if (vp56_rac_get_prob_branchy(&s->c, 252))
827 s->prob.p.single_ref[i][0] =
828 update_prob(&s->c, s->prob.p.single_ref[i][0]);
829 if (vp56_rac_get_prob_branchy(&s->c, 252))
830 s->prob.p.single_ref[i][1] =
831 update_prob(&s->c, s->prob.p.single_ref[i][1]);
835 if (s->comppredmode != PRED_SINGLEREF) {
836 for (i = 0; i < 5; i++)
837 if (vp56_rac_get_prob_branchy(&s->c, 252))
838 s->prob.p.comp_ref[i] =
839 update_prob(&s->c, s->prob.p.comp_ref[i]);
842 for (i = 0; i < 4; i++)
843 for (j = 0; j < 9; j++)
844 if (vp56_rac_get_prob_branchy(&s->c, 252))
845 s->prob.p.y_mode[i][j] =
846 update_prob(&s->c, s->prob.p.y_mode[i][j]);
848 for (i = 0; i < 4; i++)
849 for (j = 0; j < 4; j++)
850 for (k = 0; k < 3; k++)
851 if (vp56_rac_get_prob_branchy(&s->c, 252))
852 s->prob.p.partition[3 - i][j][k] =
853 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
855 // mv fields don't use the update_prob subexp model for some reason
856 for (i = 0; i < 3; i++)
857 if (vp56_rac_get_prob_branchy(&s->c, 252))
858 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
860 for (i = 0; i < 2; i++) {
861 if (vp56_rac_get_prob_branchy(&s->c, 252))
862 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
864 for (j = 0; j < 10; j++)
865 if (vp56_rac_get_prob_branchy(&s->c, 252))
866 s->prob.p.mv_comp[i].classes[j] =
867 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
872 for (j = 0; j < 10; j++)
873 if (vp56_rac_get_prob_branchy(&s->c, 252))
874 s->prob.p.mv_comp[i].bits[j] =
875 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
878 for (i = 0; i < 2; i++) {
879 for (j = 0; j < 2; j++)
880 for (k = 0; k < 3; k++)
881 if (vp56_rac_get_prob_branchy(&s->c, 252))
882 s->prob.p.mv_comp[i].class0_fp[j][k] =
883 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
885 for (j = 0; j < 3; j++)
886 if (vp56_rac_get_prob_branchy(&s->c, 252))
887 s->prob.p.mv_comp[i].fp[j] =
888 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
891 if (s->highprecisionmvs) {
892 for (i = 0; i < 2; i++) {
893 if (vp56_rac_get_prob_branchy(&s->c, 252))
894 s->prob.p.mv_comp[i].class0_hp =
895 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.mv_comp[i].hp =
899 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
904 return (data2 - data) + size2;
907 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
910 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
911 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
914 static void find_ref_mvs(VP9Context *s,
915 VP56mv *pmv, int ref, int z, int idx, int sb)
917 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
918 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
919 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
920 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
921 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
922 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
923 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
924 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
925 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
926 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
927 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
928 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
929 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
930 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
931 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
932 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
933 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
934 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
935 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
936 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
937 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
938 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
939 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
940 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
941 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
942 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
943 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
946 int row = s->row, col = s->col, row7 = s->row7;
947 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
948 #define INVALID_MV 0x80008000U
949 uint32_t mem = INVALID_MV;
952 #define RETURN_DIRECT_MV(mv) \
954 uint32_t m = AV_RN32A(&mv); \
958 } else if (mem == INVALID_MV) { \
960 } else if (m != mem) { \
967 if (sb == 2 || sb == 1) {
968 RETURN_DIRECT_MV(b->mv[0][z]);
969 } else if (sb == 3) {
970 RETURN_DIRECT_MV(b->mv[2][z]);
971 RETURN_DIRECT_MV(b->mv[1][z]);
972 RETURN_DIRECT_MV(b->mv[0][z]);
975 #define RETURN_MV(mv) \
980 clamp_mv(&tmp, &mv, s); \
981 m = AV_RN32A(&tmp); \
985 } else if (mem == INVALID_MV) { \
987 } else if (m != mem) { \
992 uint32_t m = AV_RN32A(&mv); \
994 clamp_mv(pmv, &mv, s); \
996 } else if (mem == INVALID_MV) { \
998 } else if (m != mem) { \
999 clamp_mv(pmv, &mv, s); \
1006 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1007 if (mv->ref[0] == ref) {
1008 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1009 } else if (mv->ref[1] == ref) {
1010 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1013 if (col > s->tiling.tile_col_start) {
1014 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1015 if (mv->ref[0] == ref) {
1016 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1017 } else if (mv->ref[1] == ref) {
1018 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1026 // previously coded MVs in this neighbourhood, using same reference frame
1027 for (; i < 8; i++) {
1028 int c = p[i][0] + col, r = p[i][1] + row;
1030 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1031 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1033 if (mv->ref[0] == ref) {
1034 RETURN_MV(mv->mv[0]);
1035 } else if (mv->ref[1] == ref) {
1036 RETURN_MV(mv->mv[1]);
1041 // MV at this position in previous frame, using same reference frame
1042 if (s->use_last_frame_mvs) {
1043 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1045 if (!s->last_uses_2pass)
1046 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1047 if (mv->ref[0] == ref) {
1048 RETURN_MV(mv->mv[0]);
1049 } else if (mv->ref[1] == ref) {
1050 RETURN_MV(mv->mv[1]);
1054 #define RETURN_SCALE_MV(mv, scale) \
1057 VP56mv mv_temp = { -mv.x, -mv.y }; \
1058 RETURN_MV(mv_temp); \
1064 // previously coded MVs in this neighbourhood, using different reference frame
1065 for (i = 0; i < 8; i++) {
1066 int c = p[i][0] + col, r = p[i][1] + row;
1068 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1069 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1071 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1072 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1074 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1075 // BUG - libvpx has this condition regardless of whether
1076 // we used the first ref MV and pre-scaling
1077 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1078 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1083 // MV at this position in previous frame, using different reference frame
1084 if (s->use_last_frame_mvs) {
1085 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1087 // no need to await_progress, because we already did that above
1088 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1089 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1091 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1092 // BUG - libvpx has this condition regardless of whether
1093 // we used the first ref MV and pre-scaling
1094 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1095 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1102 #undef RETURN_SCALE_MV
1105 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1107 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1108 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1109 s->prob.p.mv_comp[idx].classes);
1111 s->counts.mv_comp[idx].sign[sign]++;
1112 s->counts.mv_comp[idx].classes[c]++;
1116 for (n = 0, m = 0; m < c; m++) {
1117 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1119 s->counts.mv_comp[idx].bits[m][bit]++;
1122 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1124 s->counts.mv_comp[idx].fp[bit]++;
1126 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1127 s->counts.mv_comp[idx].hp[bit]++;
1131 // bug in libvpx - we count for bw entropy purposes even if the
1133 s->counts.mv_comp[idx].hp[1]++;
1137 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1138 s->counts.mv_comp[idx].class0[n]++;
1139 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1140 s->prob.p.mv_comp[idx].class0_fp[n]);
1141 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1142 n = (n << 3) | (bit << 1);
1144 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1145 s->counts.mv_comp[idx].class0_hp[bit]++;
1149 // bug in libvpx - we count for bw entropy purposes even if the
1151 s->counts.mv_comp[idx].class0_hp[1]++;
1155 return sign ? -(n + 1) : (n + 1);
1158 static void fill_mv(VP9Context *s,
1159 VP56mv *mv, int mode, int sb)
1163 if (mode == ZEROMV) {
1164 memset(mv, 0, sizeof(*mv) * 2);
1168 // FIXME cache this value and reuse for other subblocks
1169 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1170 mode == NEWMV ? -1 : sb);
1171 // FIXME maybe move this code into find_ref_mvs()
1172 if ((mode == NEWMV || sb == -1) &&
1173 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1187 if (mode == NEWMV) {
1188 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1189 s->prob.p.mv_joint);
1191 s->counts.mv_joint[j]++;
1192 if (j >= MV_JOINT_V)
1193 mv[0].y += read_mv_component(s, 0, hp);
1195 mv[0].x += read_mv_component(s, 1, hp);
1199 // FIXME cache this value and reuse for other subblocks
1200 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1201 mode == NEWMV ? -1 : sb);
1202 if ((mode == NEWMV || sb == -1) &&
1203 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1217 if (mode == NEWMV) {
1218 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1219 s->prob.p.mv_joint);
1221 s->counts.mv_joint[j]++;
1222 if (j >= MV_JOINT_V)
1223 mv[1].y += read_mv_component(s, 0, hp);
1225 mv[1].x += read_mv_component(s, 1, hp);
1231 static void decode_mode(AVCodecContext *ctx)
1233 static const uint8_t left_ctx[N_BS_SIZES] = {
1234 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1236 static const uint8_t above_ctx[N_BS_SIZES] = {
1237 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1239 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1240 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1241 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1243 VP9Context *s = ctx->priv_data;
1245 int row = s->row, col = s->col, row7 = s->row7;
1246 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1247 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1248 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1249 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1251 if (!s->segmentation.enabled) {
1253 } else if (s->keyframe || s->intraonly) {
1254 b->seg_id = s->segmentation.update_map ?
1255 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg) : 0;
1256 } else if (!s->segmentation.update_map ||
1257 (s->segmentation.temporal &&
1258 vp56_rac_get_prob_branchy(&s->c,
1259 s->prob.segpred[s->above_segpred_ctx[col] +
1260 s->left_segpred_ctx[row7]]))) {
1262 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1264 if (!s->last_uses_2pass)
1265 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1266 for (y = 0; y < h4; y++)
1267 for (x = 0; x < w4; x++)
1268 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1269 av_assert1(pred < 8);
1272 memset(&s->above_segpred_ctx[col], 1, w4);
1273 memset(&s->left_segpred_ctx[row7], 1, h4);
1275 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1278 memset(&s->above_segpred_ctx[col], 0, w4);
1279 memset(&s->left_segpred_ctx[row7], 0, h4);
1281 if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
1282 uint8_t *segmap = s->frames[CUR_FRAME].segmentation_map;
1284 for (y = 0; y < h4; y++)
1285 memset(&segmap[(y + row) * 8 * s->sb_cols + col], b->seg_id, w4);
1288 b->skip = s->segmentation.enabled &&
1289 s->segmentation.feat[b->seg_id].skip_enabled;
1291 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1292 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1293 s->counts.skip[c][b->skip]++;
1296 if (s->keyframe || s->intraonly) {
1298 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1299 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1303 if (have_a && have_l) {
1304 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1307 c = have_a ? 2 * s->above_intra_ctx[col] :
1308 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1310 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1311 s->counts.intra[c][bit]++;
1315 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1319 c = (s->above_skip_ctx[col] ? max_tx :
1320 s->above_txfm_ctx[col]) +
1321 (s->left_skip_ctx[row7] ? max_tx :
1322 s->left_txfm_ctx[row7]) > max_tx;
1324 c = s->above_skip_ctx[col] ? 1 :
1325 (s->above_txfm_ctx[col] * 2 > max_tx);
1327 } else if (have_l) {
1328 c = s->left_skip_ctx[row7] ? 1 :
1329 (s->left_txfm_ctx[row7] * 2 > max_tx);
1335 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1337 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1339 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1341 s->counts.tx32p[c][b->tx]++;
1344 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1346 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1347 s->counts.tx16p[c][b->tx]++;
1350 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1351 s->counts.tx8p[c][b->tx]++;
1358 b->tx = FFMIN(max_tx, s->txfmmode);
1361 if (s->keyframe || s->intraonly) {
1362 uint8_t *a = &s->above_mode_ctx[col * 2];
1363 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1366 if (b->bs > BS_8x8) {
1367 // FIXME the memory storage intermediates here aren't really
1368 // necessary, they're just there to make the code slightly
1370 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1371 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1372 if (b->bs != BS_8x4) {
1373 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1374 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1375 l[0] = a[1] = b->mode[1];
1377 l[0] = a[1] = b->mode[1] = b->mode[0];
1379 if (b->bs != BS_4x8) {
1380 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1381 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1382 if (b->bs != BS_8x4) {
1383 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1384 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1385 l[1] = a[1] = b->mode[3];
1387 l[1] = a[1] = b->mode[3] = b->mode[2];
1390 b->mode[2] = b->mode[0];
1391 l[1] = a[1] = b->mode[3] = b->mode[1];
1394 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1395 vp9_default_kf_ymode_probs[*a][*l]);
1396 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1397 // FIXME this can probably be optimized
1398 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1399 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1401 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1402 vp9_default_kf_uvmode_probs[b->mode[3]]);
1403 } else if (b->intra) {
1405 if (b->bs > BS_8x8) {
1406 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1407 s->prob.p.y_mode[0]);
1408 s->counts.y_mode[0][b->mode[0]]++;
1409 if (b->bs != BS_8x4) {
1410 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1411 s->prob.p.y_mode[0]);
1412 s->counts.y_mode[0][b->mode[1]]++;
1414 b->mode[1] = b->mode[0];
1416 if (b->bs != BS_4x8) {
1417 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1418 s->prob.p.y_mode[0]);
1419 s->counts.y_mode[0][b->mode[2]]++;
1420 if (b->bs != BS_8x4) {
1421 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1422 s->prob.p.y_mode[0]);
1423 s->counts.y_mode[0][b->mode[3]]++;
1425 b->mode[3] = b->mode[2];
1428 b->mode[2] = b->mode[0];
1429 b->mode[3] = b->mode[1];
1432 static const uint8_t size_group[10] = {
1433 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1435 int sz = size_group[b->bs];
1437 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1438 s->prob.p.y_mode[sz]);
1439 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1440 s->counts.y_mode[sz][b->mode[3]]++;
1442 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1443 s->prob.p.uv_mode[b->mode[3]]);
1444 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1446 static const uint8_t inter_mode_ctx_lut[14][14] = {
1447 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1448 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1449 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1450 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1451 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1452 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1453 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1454 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1455 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1456 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1457 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1458 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1459 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1460 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1463 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1464 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1466 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1468 // read comp_pred flag
1469 if (s->comppredmode != PRED_SWITCHABLE) {
1470 b->comp = s->comppredmode == PRED_COMPREF;
1474 // FIXME add intra as ref=0xff (or -1) to make these easier?
1477 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1479 } else if (s->above_comp_ctx[col]) {
1480 c = 2 + (s->left_intra_ctx[row7] ||
1481 s->left_ref_ctx[row7] == s->fixcompref);
1482 } else if (s->left_comp_ctx[row7]) {
1483 c = 2 + (s->above_intra_ctx[col] ||
1484 s->above_ref_ctx[col] == s->fixcompref);
1486 c = (!s->above_intra_ctx[col] &&
1487 s->above_ref_ctx[col] == s->fixcompref) ^
1488 (!s->left_intra_ctx[row7] &&
1489 s->left_ref_ctx[row & 7] == s->fixcompref);
1492 c = s->above_comp_ctx[col] ? 3 :
1493 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1495 } else if (have_l) {
1496 c = s->left_comp_ctx[row7] ? 3 :
1497 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1501 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1502 s->counts.comp[c][b->comp]++;
1505 // read actual references
1506 // FIXME probably cache a few variables here to prevent repetitive
1507 // memory accesses below
1508 if (b->comp) /* two references */ {
1509 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1511 b->ref[fix_idx] = s->fixcompref;
1512 // FIXME can this codeblob be replaced by some sort of LUT?
1515 if (s->above_intra_ctx[col]) {
1516 if (s->left_intra_ctx[row7]) {
1519 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1521 } else if (s->left_intra_ctx[row7]) {
1522 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1524 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1526 if (refl == refa && refa == s->varcompref[1]) {
1528 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1529 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1530 (refl == s->fixcompref && refa == s->varcompref[0])) {
1533 c = (refa == refl) ? 3 : 1;
1535 } else if (!s->left_comp_ctx[row7]) {
1536 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1539 c = (refl == s->varcompref[1] &&
1540 refa != s->varcompref[1]) ? 2 : 4;
1542 } else if (!s->above_comp_ctx[col]) {
1543 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1546 c = (refa == s->varcompref[1] &&
1547 refl != s->varcompref[1]) ? 2 : 4;
1550 c = (refl == refa) ? 4 : 2;
1554 if (s->above_intra_ctx[col]) {
1556 } else if (s->above_comp_ctx[col]) {
1557 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1559 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1562 } else if (have_l) {
1563 if (s->left_intra_ctx[row7]) {
1565 } else if (s->left_comp_ctx[row7]) {
1566 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1568 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1573 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1574 b->ref[var_idx] = s->varcompref[bit];
1575 s->counts.comp_ref[c][bit]++;
1576 } else /* single reference */ {
1579 if (have_a && !s->above_intra_ctx[col]) {
1580 if (have_l && !s->left_intra_ctx[row7]) {
1581 if (s->left_comp_ctx[row7]) {
1582 if (s->above_comp_ctx[col]) {
1583 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1584 !s->above_ref_ctx[col]);
1586 c = (3 * !s->above_ref_ctx[col]) +
1587 (!s->fixcompref || !s->left_ref_ctx[row7]);
1589 } else if (s->above_comp_ctx[col]) {
1590 c = (3 * !s->left_ref_ctx[row7]) +
1591 (!s->fixcompref || !s->above_ref_ctx[col]);
1593 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1595 } else if (s->above_intra_ctx[col]) {
1597 } else if (s->above_comp_ctx[col]) {
1598 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1600 c = 4 * (!s->above_ref_ctx[col]);
1602 } else if (have_l && !s->left_intra_ctx[row7]) {
1603 if (s->left_intra_ctx[row7]) {
1605 } else if (s->left_comp_ctx[row7]) {
1606 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1608 c = 4 * (!s->left_ref_ctx[row7]);
1613 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1614 s->counts.single_ref[c][0][bit]++;
1618 // FIXME can this codeblob be replaced by some sort of LUT?
1621 if (s->left_intra_ctx[row7]) {
1622 if (s->above_intra_ctx[col]) {
1624 } else if (s->above_comp_ctx[col]) {
1625 c = 1 + 2 * (s->fixcompref == 1 ||
1626 s->above_ref_ctx[col] == 1);
1627 } else if (!s->above_ref_ctx[col]) {
1630 c = 4 * (s->above_ref_ctx[col] == 1);
1632 } else if (s->above_intra_ctx[col]) {
1633 if (s->left_intra_ctx[row7]) {
1635 } else if (s->left_comp_ctx[row7]) {
1636 c = 1 + 2 * (s->fixcompref == 1 ||
1637 s->left_ref_ctx[row7] == 1);
1638 } else if (!s->left_ref_ctx[row7]) {
1641 c = 4 * (s->left_ref_ctx[row7] == 1);
1643 } else if (s->above_comp_ctx[col]) {
1644 if (s->left_comp_ctx[row7]) {
1645 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1646 c = 3 * (s->fixcompref == 1 ||
1647 s->left_ref_ctx[row7] == 1);
1651 } else if (!s->left_ref_ctx[row7]) {
1652 c = 1 + 2 * (s->fixcompref == 1 ||
1653 s->above_ref_ctx[col] == 1);
1655 c = 3 * (s->left_ref_ctx[row7] == 1) +
1656 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1658 } else if (s->left_comp_ctx[row7]) {
1659 if (!s->above_ref_ctx[col]) {
1660 c = 1 + 2 * (s->fixcompref == 1 ||
1661 s->left_ref_ctx[row7] == 1);
1663 c = 3 * (s->above_ref_ctx[col] == 1) +
1664 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1666 } else if (!s->above_ref_ctx[col]) {
1667 if (!s->left_ref_ctx[row7]) {
1670 c = 4 * (s->left_ref_ctx[row7] == 1);
1672 } else if (!s->left_ref_ctx[row7]) {
1673 c = 4 * (s->above_ref_ctx[col] == 1);
1675 c = 2 * (s->left_ref_ctx[row7] == 1) +
1676 2 * (s->above_ref_ctx[col] == 1);
1679 if (s->above_intra_ctx[col] ||
1680 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1682 } else if (s->above_comp_ctx[col]) {
1683 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1685 c = 4 * (s->above_ref_ctx[col] == 1);
1688 } else if (have_l) {
1689 if (s->left_intra_ctx[row7] ||
1690 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1692 } else if (s->left_comp_ctx[row7]) {
1693 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1695 c = 4 * (s->left_ref_ctx[row7] == 1);
1700 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1701 s->counts.single_ref[c][1][bit]++;
1702 b->ref[0] = 1 + bit;
1707 if (b->bs <= BS_8x8) {
1708 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1709 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1711 static const uint8_t off[10] = {
1712 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1715 // FIXME this needs to use the LUT tables from find_ref_mvs
1716 // because not all are -1,0/0,-1
1717 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1718 [s->left_mode_ctx[row7 + off[b->bs]]];
1720 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1721 s->prob.p.mv_mode[c]);
1722 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1723 s->counts.mv_mode[c][b->mode[0] - 10]++;
1727 if (s->filtermode == FILTER_SWITCHABLE) {
1730 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1731 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1732 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1733 s->left_filter_ctx[row7] : 3;
1735 c = s->above_filter_ctx[col];
1737 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1738 c = s->left_filter_ctx[row7];
1743 b->filter = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1744 s->prob.p.filter[c]);
1745 s->counts.filter[c][b->filter]++;
1747 b->filter = s->filtermode;
1750 if (b->bs > BS_8x8) {
1751 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1753 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1754 s->prob.p.mv_mode[c]);
1755 s->counts.mv_mode[c][b->mode[0] - 10]++;
1756 fill_mv(s, b->mv[0], b->mode[0], 0);
1758 if (b->bs != BS_8x4) {
1759 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1760 s->prob.p.mv_mode[c]);
1761 s->counts.mv_mode[c][b->mode[1] - 10]++;
1762 fill_mv(s, b->mv[1], b->mode[1], 1);
1764 b->mode[1] = b->mode[0];
1765 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1766 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1769 if (b->bs != BS_4x8) {
1770 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1771 s->prob.p.mv_mode[c]);
1772 s->counts.mv_mode[c][b->mode[2] - 10]++;
1773 fill_mv(s, b->mv[2], b->mode[2], 2);
1775 if (b->bs != BS_8x4) {
1776 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1777 s->prob.p.mv_mode[c]);
1778 s->counts.mv_mode[c][b->mode[3] - 10]++;
1779 fill_mv(s, b->mv[3], b->mode[3], 3);
1781 b->mode[3] = b->mode[2];
1782 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1783 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1786 b->mode[2] = b->mode[0];
1787 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1788 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1789 b->mode[3] = b->mode[1];
1790 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1791 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1794 fill_mv(s, b->mv[0], b->mode[0], -1);
1795 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1796 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1797 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1798 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1799 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1800 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1804 // FIXME this can probably be optimized
1805 memset(&s->above_skip_ctx[col], b->skip, w4);
1806 memset(&s->left_skip_ctx[row7], b->skip, h4);
1807 memset(&s->above_txfm_ctx[col], b->tx, w4);
1808 memset(&s->left_txfm_ctx[row7], b->tx, h4);
1809 memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
1810 memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
1811 if (!s->keyframe && !s->intraonly) {
1812 memset(&s->above_intra_ctx[col], b->intra, w4);
1813 memset(&s->left_intra_ctx[row7], b->intra, h4);
1814 memset(&s->above_comp_ctx[col], b->comp, w4);
1815 memset(&s->left_comp_ctx[row7], b->comp, h4);
1816 memset(&s->above_mode_ctx[col], b->mode[3], w4);
1817 memset(&s->left_mode_ctx[row7], b->mode[3], h4);
1818 if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
1819 memset(&s->above_filter_ctx[col], b->filter, w4);
1820 memset(&s->left_filter_ctx[row7], b->filter, h4);
1821 b->filter = vp9_filter_lut[b->filter];
1823 if (b->bs > BS_8x8) {
1824 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1826 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1827 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1828 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1829 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1830 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1831 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1832 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1833 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1835 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1837 for (n = 0; n < w4 * 2; n++) {
1838 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1839 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1841 for (n = 0; n < h4 * 2; n++) {
1842 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1843 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1847 if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
1848 // as a direct check in above branches
1849 int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1851 memset(&s->above_ref_ctx[col], vref, w4);
1852 memset(&s->left_ref_ctx[row7], vref, h4);
1857 for (y = 0; y < h4; y++) {
1858 int x, o = (row + y) * s->sb_cols * 8 + col;
1859 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1862 for (x = 0; x < w4; x++) {
1866 } else if (b->comp) {
1867 for (x = 0; x < w4; x++) {
1868 mv[x].ref[0] = b->ref[0];
1869 mv[x].ref[1] = b->ref[1];
1870 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
1871 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
1874 for (x = 0; x < w4; x++) {
1875 mv[x].ref[0] = b->ref[0];
1877 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
1883 // FIXME remove tx argument, and merge cnt/eob arguments?
1884 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
1885 enum TxfmMode tx, unsigned (*cnt)[6][3],
1886 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
1887 int nnz, const int16_t *scan, const int16_t (*nb)[2],
1888 const int16_t *band_counts, const int16_t *qmul)
1890 int i = 0, band = 0, band_left = band_counts[band];
1891 uint8_t *tp = p[0][nnz];
1892 uint8_t cache[1024];
1897 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
1898 eob[band][nnz][val]++;
1903 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
1904 cnt[band][nnz][0]++;
1906 band_left = band_counts[++band];
1908 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1910 if (++i == n_coeffs)
1911 break; //invalid input; blocks should end with EOB
1916 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
1917 cnt[band][nnz][1]++;
1921 // fill in p[3-10] (model fill) - only once per frame for each pos
1923 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
1925 cnt[band][nnz][2]++;
1926 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
1927 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
1928 cache[rc] = val = 2;
1930 val = 3 + vp56_rac_get_prob(c, tp[5]);
1933 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
1935 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
1936 val = 5 + vp56_rac_get_prob(c, 159);
1938 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
1939 val += vp56_rac_get_prob(c, 145);
1943 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
1944 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
1945 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
1946 val += (vp56_rac_get_prob(c, 148) << 1);
1947 val += vp56_rac_get_prob(c, 140);
1949 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
1950 val += (vp56_rac_get_prob(c, 155) << 2);
1951 val += (vp56_rac_get_prob(c, 140) << 1);
1952 val += vp56_rac_get_prob(c, 135);
1954 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
1955 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
1956 val += (vp56_rac_get_prob(c, 157) << 3);
1957 val += (vp56_rac_get_prob(c, 141) << 2);
1958 val += (vp56_rac_get_prob(c, 134) << 1);
1959 val += vp56_rac_get_prob(c, 130);
1961 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
1962 val += (vp56_rac_get_prob(c, 254) << 12);
1963 val += (vp56_rac_get_prob(c, 254) << 11);
1964 val += (vp56_rac_get_prob(c, 252) << 10);
1965 val += (vp56_rac_get_prob(c, 249) << 9);
1966 val += (vp56_rac_get_prob(c, 243) << 8);
1967 val += (vp56_rac_get_prob(c, 230) << 7);
1968 val += (vp56_rac_get_prob(c, 196) << 6);
1969 val += (vp56_rac_get_prob(c, 177) << 5);
1970 val += (vp56_rac_get_prob(c, 153) << 4);
1971 val += (vp56_rac_get_prob(c, 140) << 3);
1972 val += (vp56_rac_get_prob(c, 133) << 2);
1973 val += (vp56_rac_get_prob(c, 130) << 1);
1974 val += vp56_rac_get_prob(c, 129);
1979 band_left = band_counts[++band];
1980 if (tx == TX_32X32) // FIXME slow
1981 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
1983 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
1984 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1986 } while (++i < n_coeffs);
1991 static void decode_coeffs(AVCodecContext *ctx)
1993 VP9Context *s = ctx->priv_data;
1995 int row = s->row, col = s->col;
1996 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
1997 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
1998 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
1999 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2000 int end_x = FFMIN(2 * (s->cols - col), w4);
2001 int end_y = FFMIN(2 * (s->rows - row), h4);
2002 int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
2003 int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
2004 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2005 int tx = 4 * s->lossless + b->tx;
2006 const int16_t * const *yscans = vp9_scans[tx];
2007 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2008 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2009 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2010 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2011 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2012 static const int16_t band_counts[4][8] = {
2013 { 1, 2, 3, 4, 3, 16 - 13 },
2014 { 1, 2, 3, 4, 11, 64 - 21 },
2015 { 1, 2, 3, 4, 11, 256 - 21 },
2016 { 1, 2, 3, 4, 11, 1024 - 21 },
2018 const int16_t *y_band_counts = band_counts[b->tx];
2019 const int16_t *uv_band_counts = band_counts[b->uvtx];
2022 if (b->tx > TX_4X4) { // FIXME slow
2023 for (y = 0; y < end_y; y += step1d)
2024 for (x = 1; x < step1d; x++)
2026 for (x = 0; x < end_x; x += step1d)
2027 for (y = 1; y < step1d; y++)
2030 for (n = 0, y = 0; y < end_y; y += step1d) {
2031 for (x = 0; x < end_x; x += step1d, n += step) {
2032 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
2035 int nnz = a[x] + l[y];
2036 res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
2037 b->tx, c, e, p, nnz, yscans[txtp],
2038 ynbs[txtp], y_band_counts, qmul[0]);
2039 a[x] = l[y] = !!res;
2040 if (b->tx > TX_8X8) {
2041 AV_WN16A(&s->eob[n], res);
2047 if (b->tx > TX_4X4) { // FIXME slow
2048 for (y = 0; y < end_y; y += step1d)
2049 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
2050 for (x = 0; x < end_x; x += step1d)
2051 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
2054 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2055 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2056 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2061 for (pl = 0; pl < 2; pl++) {
2062 a = &s->above_uv_nnz_ctx[pl][col];
2063 l = &s->left_uv_nnz_ctx[pl][row & 7];
2064 if (b->uvtx > TX_4X4) { // FIXME slow
2065 for (y = 0; y < end_y; y += uvstep1d)
2066 for (x = 1; x < uvstep1d; x++)
2068 for (x = 0; x < end_x; x += uvstep1d)
2069 for (y = 1; y < uvstep1d; y++)
2072 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2073 for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
2074 int nnz = a[x] + l[y];
2075 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
2076 16 * uvstep, b->uvtx, c, e, p, nnz,
2077 uvscan, uvnb, uv_band_counts, qmul[1]);
2078 a[x] = l[y] = !!res;
2079 if (b->uvtx > TX_8X8) {
2080 AV_WN16A(&s->uveob[pl][n], res);
2082 s->uveob[pl][n] = res;
2086 if (b->uvtx > TX_4X4) { // FIXME slow
2087 for (y = 0; y < end_y; y += uvstep1d)
2088 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
2089 for (x = 0; x < end_x; x += uvstep1d)
2090 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
2095 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2096 uint8_t *dst_edge, ptrdiff_t stride_edge,
2097 uint8_t *dst_inner, ptrdiff_t stride_inner,
2098 uint8_t *l, int col, int x, int w,
2099 int row, int y, enum TxfmMode tx,
2102 int have_top = row > 0 || y > 0;
2103 int have_left = col > s->tiling.tile_col_start || x > 0;
2104 int have_right = x < w - 1;
2105 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2106 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2107 { DC_127_PRED, VERT_PRED } },
2108 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2109 { HOR_PRED, HOR_PRED } },
2110 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2111 { LEFT_DC_PRED, DC_PRED } },
2112 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2113 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2114 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2115 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2116 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2117 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2118 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2119 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2120 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2121 { DC_127_PRED, VERT_LEFT_PRED } },
2122 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2123 { HOR_UP_PRED, HOR_UP_PRED } },
2124 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2125 { HOR_PRED, TM_VP8_PRED } },
2127 static const struct {
2128 uint8_t needs_left:1;
2129 uint8_t needs_top:1;
2130 uint8_t needs_topleft:1;
2131 uint8_t needs_topright:1;
2132 } edges[N_INTRA_PRED_MODES] = {
2133 [VERT_PRED] = { .needs_top = 1 },
2134 [HOR_PRED] = { .needs_left = 1 },
2135 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2136 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2137 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2138 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2139 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2140 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2141 [HOR_UP_PRED] = { .needs_left = 1 },
2142 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2143 [LEFT_DC_PRED] = { .needs_left = 1 },
2144 [TOP_DC_PRED] = { .needs_top = 1 },
2145 [DC_128_PRED] = { 0 },
2146 [DC_127_PRED] = { 0 },
2147 [DC_129_PRED] = { 0 }
2150 av_assert2(mode >= 0 && mode < 10);
2151 mode = mode_conv[mode][have_left][have_top];
2152 if (edges[mode].needs_top) {
2153 uint8_t *top, *topleft;
2154 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2155 int n_px_need_tr = 0;
2157 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2160 // if top of sb64-row, use s->intra_pred_data[] instead of
2161 // dst[-stride] for intra prediction (it contains pre- instead of
2162 // post-loopfilter data)
2164 top = !(row & 7) && !y ?
2165 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2166 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2168 topleft = !(row & 7) && !y ?
2169 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2170 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2171 &dst_inner[-stride_inner];
2175 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2176 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2177 n_px_need + n_px_need_tr <= n_px_have) {
2181 if (n_px_need <= n_px_have) {
2182 memcpy(*a, top, n_px_need);
2184 memcpy(*a, top, n_px_have);
2185 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2186 n_px_need - n_px_have);
2189 memset(*a, 127, n_px_need);
2191 if (edges[mode].needs_topleft) {
2192 if (have_left && have_top) {
2193 (*a)[-1] = topleft[-1];
2195 (*a)[-1] = have_top ? 129 : 127;
2198 if (tx == TX_4X4 && edges[mode].needs_topright) {
2199 if (have_top && have_right &&
2200 n_px_need + n_px_need_tr <= n_px_have) {
2201 memcpy(&(*a)[4], &top[4], 4);
2203 memset(&(*a)[4], (*a)[3], 4);
2208 if (edges[mode].needs_left) {
2210 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2211 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2212 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2214 if (n_px_need <= n_px_have) {
2215 for (i = 0; i < n_px_need; i++)
2216 l[i] = dst[i * stride - 1];
2218 for (i = 0; i < n_px_have; i++)
2219 l[i] = dst[i * stride - 1];
2220 memset(&l[i], l[i - 1], n_px_need - n_px_have);
2223 memset(l, 129, 4 << tx);
2230 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2232 VP9Context *s = ctx->priv_data;
2234 int row = s->row, col = s->col;
2235 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2236 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2237 int end_x = FFMIN(2 * (s->cols - col), w4);
2238 int end_y = FFMIN(2 * (s->rows - row), h4);
2239 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2240 int uvstep1d = 1 << b->uvtx, p;
2241 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2243 for (n = 0, y = 0; y < end_y; y += step1d) {
2244 uint8_t *ptr = dst, *ptr_r = dst_r;
2245 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2246 ptr_r += 4 * step1d, n += step) {
2247 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2249 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2250 uint8_t *a = &a_buf[16], l[32];
2251 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2252 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2254 mode = check_intra_mode(s, mode, &a, ptr_r,
2255 s->frames[CUR_FRAME].tf.f->linesize[0],
2256 ptr, s->y_stride, l,
2257 col, x, w4, row, y, b->tx, 0);
2258 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2260 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2261 s->block + 16 * n, eob);
2263 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2264 dst += 4 * step1d * s->y_stride;
2272 step = 1 << (b->uvtx * 2);
2273 for (p = 0; p < 2; p++) {
2274 dst = s->dst[1 + p];
2275 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2276 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2277 uint8_t *ptr = dst, *ptr_r = dst_r;
2278 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2279 ptr_r += 4 * uvstep1d, n += step) {
2280 int mode = b->uvmode;
2281 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2282 uint8_t *a = &a_buf[16], l[32];
2283 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2285 mode = check_intra_mode(s, mode, &a, ptr_r,
2286 s->frames[CUR_FRAME].tf.f->linesize[1],
2287 ptr, s->uv_stride, l,
2288 col, x, w4, row, y, b->uvtx, p + 1);
2289 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2291 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2292 s->uvblock[p] + 16 * n, eob);
2294 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2295 dst += 4 * uvstep1d * s->uv_stride;
2300 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2301 uint8_t *dst, ptrdiff_t dst_stride,
2302 const uint8_t *ref, ptrdiff_t ref_stride,
2303 ThreadFrame *ref_frame,
2304 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2305 int bw, int bh, int w, int h)
2307 int mx = mv->x, my = mv->y, th;
2311 ref += y * ref_stride + x;
2314 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2315 // we use +7 because the last 7 pixels of each sbrow can be changed in
2316 // the longest loopfilter of the next sbrow
2317 th = (y + bh + 4 * !!my + 7) >> 6;
2318 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2319 if (x < !!mx * 3 || y < !!my * 3 ||
2320 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2321 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2322 ref - !!my * 3 * ref_stride - !!mx * 3,
2324 bw + !!mx * 7, bh + !!my * 7,
2325 x - !!mx * 3, y - !!my * 3, w, h);
2326 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2329 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2332 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2333 uint8_t *dst_u, uint8_t *dst_v,
2334 ptrdiff_t dst_stride,
2335 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2336 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2337 ThreadFrame *ref_frame,
2338 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2339 int bw, int bh, int w, int h)
2341 int mx = mv->x, my = mv->y, th;
2345 ref_u += y * src_stride_u + x;
2346 ref_v += y * src_stride_v + x;
2349 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2350 // we use +7 because the last 7 pixels of each sbrow can be changed in
2351 // the longest loopfilter of the next sbrow
2352 th = (y + bh + 4 * !!my + 7) >> 5;
2353 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2354 if (x < !!mx * 3 || y < !!my * 3 ||
2355 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2356 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2357 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2359 bw + !!mx * 7, bh + !!my * 7,
2360 x - !!mx * 3, y - !!my * 3, w, h);
2361 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2362 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2364 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2365 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2367 bw + !!mx * 7, bh + !!my * 7,
2368 x - !!mx * 3, y - !!my * 3, w, h);
2369 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2370 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2372 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2373 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2377 static void inter_recon(AVCodecContext *ctx)
2379 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2380 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2381 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2383 VP9Context *s = ctx->priv_data;
2385 int row = s->row, col = s->col;
2386 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2387 AVFrame *ref1 = tref1->f, *ref2;
2388 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2389 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2392 tref2 = &s->refs[s->refidx[b->ref[1]]];
2399 if (b->bs > BS_8x8) {
2400 if (b->bs == BS_8x4) {
2401 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2402 ref1->data[0], ref1->linesize[0], tref1,
2403 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2404 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2405 s->dst[0] + 4 * ls_y, ls_y,
2406 ref1->data[0], ref1->linesize[0], tref1,
2407 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2410 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2411 ref2->data[0], ref2->linesize[0], tref2,
2412 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2413 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2414 s->dst[0] + 4 * ls_y, ls_y,
2415 ref2->data[0], ref2->linesize[0], tref2,
2416 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2418 } else if (b->bs == BS_4x8) {
2419 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2420 ref1->data[0], ref1->linesize[0], tref1,
2421 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2422 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2423 ref1->data[0], ref1->linesize[0], tref1,
2424 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2427 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2428 ref2->data[0], ref2->linesize[0], tref2,
2429 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2430 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2431 ref2->data[0], ref2->linesize[0], tref2,
2432 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2435 av_assert2(b->bs == BS_4x4);
2437 // FIXME if two horizontally adjacent blocks have the same MV,
2438 // do a w8 instead of a w4 call
2439 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2440 ref1->data[0], ref1->linesize[0], tref1,
2441 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2442 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2443 ref1->data[0], ref1->linesize[0], tref1,
2444 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2445 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2446 s->dst[0] + 4 * ls_y, ls_y,
2447 ref1->data[0], ref1->linesize[0], tref1,
2448 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2449 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2450 s->dst[0] + 4 * ls_y + 4, ls_y,
2451 ref1->data[0], ref1->linesize[0], tref1,
2452 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2455 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2456 ref2->data[0], ref2->linesize[0], tref2,
2457 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2458 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2459 ref2->data[0], ref2->linesize[0], tref2,
2460 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2461 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2462 s->dst[0] + 4 * ls_y, ls_y,
2463 ref2->data[0], ref2->linesize[0], tref2,
2464 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2465 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2466 s->dst[0] + 4 * ls_y + 4, ls_y,
2467 ref2->data[0], ref2->linesize[0], tref2,
2468 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2472 int bwl = bwlog_tab[0][b->bs];
2473 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2475 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2476 ref1->data[0], ref1->linesize[0], tref1,
2477 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2480 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2481 ref2->data[0], ref2->linesize[0], tref2,
2482 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2487 int bwl = bwlog_tab[1][b->bs];
2488 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2497 if (b->bs > BS_8x8) {
2498 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2499 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2504 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2505 s->dst[1], s->dst[2], ls_uv,
2506 ref1->data[1], ref1->linesize[1],
2507 ref1->data[2], ref1->linesize[2], tref1,
2508 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2511 if (b->bs > BS_8x8) {
2512 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2513 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2517 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2518 s->dst[1], s->dst[2], ls_uv,
2519 ref2->data[1], ref2->linesize[1],
2520 ref2->data[2], ref2->linesize[2], tref2,
2521 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2526 /* mostly copied intra_reconn() */
2528 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2529 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2530 int end_x = FFMIN(2 * (s->cols - col), w4);
2531 int end_y = FFMIN(2 * (s->rows - row), h4);
2532 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2533 int uvstep1d = 1 << b->uvtx, p;
2534 uint8_t *dst = s->dst[0];
2537 for (n = 0, y = 0; y < end_y; y += step1d) {
2539 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2540 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2543 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2544 s->block + 16 * n, eob);
2546 dst += 4 * s->y_stride * step1d;
2554 step = 1 << (b->uvtx * 2);
2555 for (p = 0; p < 2; p++) {
2556 dst = s->dst[p + 1];
2557 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2559 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2560 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2563 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2564 s->uvblock[p] + 16 * n, eob);
2566 dst += 4 * uvstep1d * s->uv_stride;
2572 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2573 int row_and_7, int col_and_7,
2574 int w, int h, int col_end, int row_end,
2575 enum TxfmMode tx, int skip_inter)
2577 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2578 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2579 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2580 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2582 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2583 // edges. This means that for UV, we work on two subsampled blocks at
2584 // a time, and we only use the topleft block's mode information to set
2585 // things like block strength. Thus, for any block size smaller than
2586 // 16x16, ignore the odd portion of the block.
2587 if (tx == TX_4X4 && is_uv) {
2602 if (tx == TX_4X4 && !skip_inter) {
2603 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2604 int m_col_odd = (t << (w - 1)) - t;
2606 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2608 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2610 for (y = row_and_7; y < h + row_and_7; y++) {
2611 int col_mask_id = 2 - !(y & 7);
2613 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2614 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2615 // for odd lines, if the odd col is not being filtered,
2616 // skip odd row also:
2623 // if a/c are even row/col and b/d are odd, and d is skipped,
2624 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2625 if ((col_end & 1) && (y & 1)) {
2626 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2628 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2632 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2634 for (y = row_and_7; y < h + row_and_7; y++) {
2635 int col_mask_id = 2 - !(y & 3);
2637 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2638 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2639 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2640 lflvl->mask[is_uv][0][y][3] |= m_col;
2641 lflvl->mask[is_uv][1][y][3] |= m_col;
2645 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2648 int mask_id = (tx == TX_8X8);
2649 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2650 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2651 int m_row = m_col & masks[l2];
2653 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2654 // 8wd loopfilter to prevent going off the visible edge.
2655 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2656 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2657 int m_row_8 = m_row - m_row_16;
2659 for (y = row_and_7; y < h + row_and_7; y++) {
2660 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2661 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2664 for (y = row_and_7; y < h + row_and_7; y++)
2665 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2668 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2669 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2670 lflvl->mask[is_uv][1][y][0] |= m_col;
2671 if (y - row_and_7 == h - 1)
2672 lflvl->mask[is_uv][1][y][1] |= m_col;
2674 for (y = row_and_7; y < h + row_and_7; y += step1d)
2675 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2677 } else if (tx != TX_4X4) {
2680 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2681 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2682 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2683 for (y = row_and_7; y < h + row_and_7; y++)
2684 lflvl->mask[is_uv][0][y][mask_id] |= t;
2686 int t8 = t & 0x01, t4 = t - t8;
2688 for (y = row_and_7; y < h + row_and_7; y++) {
2689 lflvl->mask[is_uv][0][y][2] |= t4;
2690 lflvl->mask[is_uv][0][y][1] |= t8;
2692 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2694 int t8 = t & 0x11, t4 = t - t8;
2696 for (y = row_and_7; y < h + row_and_7; y++) {
2697 lflvl->mask[is_uv][0][y][2] |= t4;
2698 lflvl->mask[is_uv][0][y][1] |= t8;
2700 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2705 static void decode_b(AVCodecContext *ctx, int row, int col,
2706 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2707 enum BlockLevel bl, enum BlockPartition bp)
2709 VP9Context *s = ctx->priv_data;
2711 enum BlockSize bs = bl * 3 + bp;
2712 int y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2714 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2720 s->min_mv.x = -(128 + col * 64);
2721 s->min_mv.y = -(128 + row * 64);
2722 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2723 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2729 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2736 memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
2737 memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
2738 for (pl = 0; pl < 2; pl++) {
2739 memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
2740 memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
2745 s->block += w4 * h4 * 64;
2746 s->uvblock[0] += w4 * h4 * 16;
2747 s->uvblock[1] += w4 * h4 * 16;
2748 s->eob += 4 * w4 * h4;
2749 s->uveob[0] += w4 * h4;
2750 s->uveob[1] += w4 * h4;
2756 // emulated overhangs if the stride of the target buffer can't hold. This
2757 // allows to support emu-edge and so on even if we have large block
2759 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2760 (row + h4) > s->rows;
2761 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2762 (row + h4) > s->rows;
2764 s->dst[0] = s->tmp_y;
2767 s->dst[0] = f->data[0] + yoff;
2768 s->y_stride = f->linesize[0];
2771 s->dst[1] = s->tmp_uv[0];
2772 s->dst[2] = s->tmp_uv[1];
2775 s->dst[1] = f->data[1] + uvoff;
2776 s->dst[2] = f->data[2] + uvoff;
2777 s->uv_stride = f->linesize[1];
2780 intra_recon(ctx, yoff, uvoff);
2785 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2787 for (n = 0; o < w; n++) {
2792 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
2793 s->tmp_y + o, 64, h, 0, 0);
2799 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2801 for (n = 1; o < w; n++) {
2806 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
2807 s->tmp_uv[0] + o, 32, h, 0, 0);
2808 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
2809 s->tmp_uv[1] + o, 32, h, 0, 0);
2815 // pick filter level and find edges to apply filter to
2816 if (s->filter.level &&
2817 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
2818 [b->mode[3] != ZEROMV]) > 0) {
2819 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
2820 int skip_inter = !b->intra && b->skip;
2822 for (y = 0; y < h4; y++)
2823 memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
2824 mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
2825 mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
2826 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
2827 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
2828 b->uvtx, skip_inter);
2830 if (!s->filter.lim_lut[lvl]) {
2831 int sharp = s->filter.sharpness;
2835 limit >>= (sharp + 3) >> 2;
2836 limit = FFMIN(limit, 9 - sharp);
2838 limit = FFMAX(limit, 1);
2840 s->filter.lim_lut[lvl] = limit;
2841 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
2847 s->block += w4 * h4 * 64;
2848 s->uvblock[0] += w4 * h4 * 16;
2849 s->uvblock[1] += w4 * h4 * 16;
2850 s->eob += 4 * w4 * h4;
2851 s->uveob[0] += w4 * h4;
2852 s->uveob[1] += w4 * h4;
2856 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2857 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2859 VP9Context *s = ctx->priv_data;
2860 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
2861 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
2862 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
2863 s->prob.p.partition[bl][c];
2864 enum BlockPartition bp;
2865 ptrdiff_t hbs = 4 >> bl;
2866 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2867 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
2870 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2871 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2872 } else if (col + hbs < s->cols) { // FIXME why not <=?
2873 if (row + hbs < s->rows) { // FIXME why not <=?
2874 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2876 case PARTITION_NONE:
2877 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2880 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2881 yoff += hbs * 8 * y_stride;
2882 uvoff += hbs * 4 * uv_stride;
2883 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
2886 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2889 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
2891 case PARTITION_SPLIT:
2892 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2893 decode_sb(ctx, row, col + hbs, lflvl,
2894 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2895 yoff += hbs * 8 * y_stride;
2896 uvoff += hbs * 4 * uv_stride;
2897 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2898 decode_sb(ctx, row + hbs, col + hbs, lflvl,
2899 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2904 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
2905 bp = PARTITION_SPLIT;
2906 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2907 decode_sb(ctx, row, col + hbs, lflvl,
2908 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2911 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2913 } else if (row + hbs < s->rows) { // FIXME why not <=?
2914 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
2915 bp = PARTITION_SPLIT;
2916 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2917 yoff += hbs * 8 * y_stride;
2918 uvoff += hbs * 4 * uv_stride;
2919 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2922 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2925 bp = PARTITION_SPLIT;
2926 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2928 s->counts.partition[bl][c][bp]++;
2931 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2932 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2934 VP9Context *s = ctx->priv_data;
2936 ptrdiff_t hbs = 4 >> bl;
2937 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2938 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
2941 av_assert2(b->bl == BL_8X8);
2942 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
2943 } else if (s->b->bl == bl) {
2944 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
2945 if (b->bp == PARTITION_H && row + hbs < s->rows) {
2946 yoff += hbs * 8 * y_stride;
2947 uvoff += hbs * 4 * uv_stride;
2948 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
2949 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
2952 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
2955 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2956 if (col + hbs < s->cols) { // FIXME why not <=?
2957 if (row + hbs < s->rows) {
2958 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
2959 uvoff + 4 * hbs, bl + 1);
2960 yoff += hbs * 8 * y_stride;
2961 uvoff += hbs * 4 * uv_stride;
2962 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2963 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
2964 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2968 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
2970 } else if (row + hbs < s->rows) {
2971 yoff += hbs * 8 * y_stride;
2972 uvoff += hbs * 4 * uv_stride;
2973 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
2978 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
2979 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
2981 VP9Context *s = ctx->priv_data;
2982 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2983 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
2984 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
2987 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
2988 // if you think of them as acting on a 8x8 block max, we can interleave
2989 // each v/h within the single x loop, but that only works if we work on
2990 // 8 pixel blocks, and we won't always do that (we want at least 16px
2991 // to use SSE2 optimizations, perhaps 32 for AVX2)
2993 // filter edges between columns, Y plane (e.g. block1 | block2)
2994 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
2995 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
2996 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
2997 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
2998 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
2999 unsigned hm = hm1 | hm2 | hm13 | hm23;
3001 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3003 int L = *l, H = L >> 4;
3004 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3007 if (hmask1[0] & x) {
3008 if (hmask2[0] & x) {
3009 av_assert2(l[8] == L);
3010 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3012 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3014 } else if (hm2 & x) {
3017 E |= s->filter.mblim_lut[L] << 8;
3018 I |= s->filter.lim_lut[L] << 8;
3019 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3021 [0](ptr, ls_y, E, I, H);
3023 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3024 [0](ptr, ls_y, E, I, H);
3027 } else if (hm2 & x) {
3028 int L = l[8], H = L >> 4;
3029 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3032 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3033 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3037 int L = *l, H = L >> 4;
3038 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3043 E |= s->filter.mblim_lut[L] << 8;
3044 I |= s->filter.lim_lut[L] << 8;
3045 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3047 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3049 } else if (hm23 & x) {
3050 int L = l[8], H = L >> 4;
3051 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3053 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3059 // filter edges between rows, Y plane (e.g. ------)
3061 dst = f->data[0] + yoff;
3063 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3064 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3065 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3067 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3070 int L = *l, H = L >> 4;
3071 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3074 if (vmask[0] & (x << 1)) {
3075 av_assert2(l[1] == L);
3076 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3078 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3080 } else if (vm & (x << 1)) {
3083 E |= s->filter.mblim_lut[L] << 8;
3084 I |= s->filter.lim_lut[L] << 8;
3085 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3086 [!!(vmask[1] & (x << 1))]
3087 [1](ptr, ls_y, E, I, H);
3089 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3090 [1](ptr, ls_y, E, I, H);
3092 } else if (vm & (x << 1)) {
3093 int L = l[1], H = L >> 4;
3094 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3096 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3097 [1](ptr + 8, ls_y, E, I, H);
3101 int L = *l, H = L >> 4;
3102 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3104 if (vm3 & (x << 1)) {
3107 E |= s->filter.mblim_lut[L] << 8;
3108 I |= s->filter.lim_lut[L] << 8;
3109 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3111 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3113 } else if (vm3 & (x << 1)) {
3114 int L = l[1], H = L >> 4;
3115 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3117 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3122 // same principle but for U/V planes
3123 for (p = 0; p < 2; p++) {
3125 dst = f->data[1 + p] + uvoff;
3126 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3127 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3128 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3129 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3130 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3132 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3135 int L = *l, H = L >> 4;
3136 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3138 if (hmask1[0] & x) {
3139 if (hmask2[0] & x) {
3140 av_assert2(l[16] == L);
3141 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3143 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3145 } else if (hm2 & x) {
3148 E |= s->filter.mblim_lut[L] << 8;
3149 I |= s->filter.lim_lut[L] << 8;
3150 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3152 [0](ptr, ls_uv, E, I, H);
3154 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3155 [0](ptr, ls_uv, E, I, H);
3157 } else if (hm2 & x) {
3158 int L = l[16], H = L >> 4;
3159 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3161 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3162 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3170 dst = f->data[1 + p] + uvoff;
3171 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3172 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3173 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3175 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3178 int L = *l, H = L >> 4;
3179 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3182 if (vmask[0] & (x << 2)) {
3183 av_assert2(l[2] == L);
3184 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3186 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3188 } else if (vm & (x << 2)) {
3191 E |= s->filter.mblim_lut[L] << 8;
3192 I |= s->filter.lim_lut[L] << 8;
3193 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3194 [!!(vmask[1] & (x << 2))]
3195 [1](ptr, ls_uv, E, I, H);
3197 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3198 [1](ptr, ls_uv, E, I, H);
3200 } else if (vm & (x << 2)) {
3201 int L = l[2], H = L >> 4;
3202 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3204 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3205 [1](ptr + 8, ls_uv, E, I, H);
3215 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3217 int sb_start = ( idx * n) >> log2_n;
3218 int sb_end = ((idx + 1) * n) >> log2_n;
3219 *start = FFMIN(sb_start, n) << 3;
3220 *end = FFMIN(sb_end, n) << 3;
3223 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3224 int max_count, int update_factor)
3226 unsigned ct = ct0 + ct1, p2, p1;
3232 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3233 p2 = av_clip(p2, 1, 255);
3234 ct = FFMIN(ct, max_count);
3235 update_factor = FASTDIV(update_factor * ct, max_count);
3237 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3238 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3241 static void adapt_probs(VP9Context *s)
3244 prob_context *p = &s->prob_ctx[s->framectxid].p;
3245 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3248 for (i = 0; i < 4; i++)
3249 for (j = 0; j < 2; j++)
3250 for (k = 0; k < 2; k++)
3251 for (l = 0; l < 6; l++)
3252 for (m = 0; m < 6; m++) {
3253 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3254 unsigned *e = s->counts.eob[i][j][k][l][m];
3255 unsigned *c = s->counts.coef[i][j][k][l][m];
3257 if (l == 0 && m >= 3) // dc only has 3 pt
3260 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3261 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3262 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3265 if (s->keyframe || s->intraonly) {
3266 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3267 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3268 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3269 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3274 for (i = 0; i < 3; i++)
3275 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3278 for (i = 0; i < 4; i++)
3279 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3282 if (s->comppredmode == PRED_SWITCHABLE) {
3283 for (i = 0; i < 5; i++)
3284 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3288 if (s->comppredmode != PRED_SINGLEREF) {
3289 for (i = 0; i < 5; i++)
3290 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3291 s->counts.comp_ref[i][1], 20, 128);
3294 if (s->comppredmode != PRED_COMPREF) {
3295 for (i = 0; i < 5; i++) {
3296 uint8_t *pp = p->single_ref[i];
3297 unsigned (*c)[2] = s->counts.single_ref[i];
3299 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3300 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3304 // block partitioning
3305 for (i = 0; i < 4; i++)
3306 for (j = 0; j < 4; j++) {
3307 uint8_t *pp = p->partition[i][j];
3308 unsigned *c = s->counts.partition[i][j];
3310 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3311 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3312 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3316 if (s->txfmmode == TX_SWITCHABLE) {
3317 for (i = 0; i < 2; i++) {
3318 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3320 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3321 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3322 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3323 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3324 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3325 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3329 // interpolation filter
3330 if (s->filtermode == FILTER_SWITCHABLE) {
3331 for (i = 0; i < 4; i++) {
3332 uint8_t *pp = p->filter[i];
3333 unsigned *c = s->counts.filter[i];
3335 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3336 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3341 for (i = 0; i < 7; i++) {
3342 uint8_t *pp = p->mv_mode[i];
3343 unsigned *c = s->counts.mv_mode[i];
3345 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3346 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3347 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3352 uint8_t *pp = p->mv_joint;
3353 unsigned *c = s->counts.mv_joint;
3355 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3356 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3357 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3361 for (i = 0; i < 2; i++) {
3363 unsigned *c, (*c2)[2], sum;
3365 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3366 s->counts.mv_comp[i].sign[1], 20, 128);
3368 pp = p->mv_comp[i].classes;
3369 c = s->counts.mv_comp[i].classes;
3370 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3371 adapt_prob(&pp[0], c[0], sum, 20, 128);
3373 adapt_prob(&pp[1], c[1], sum, 20, 128);
3375 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3376 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3378 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3379 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3381 adapt_prob(&pp[6], c[6], sum, 20, 128);
3382 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3383 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3384 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3386 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3387 s->counts.mv_comp[i].class0[1], 20, 128);
3388 pp = p->mv_comp[i].bits;
3389 c2 = s->counts.mv_comp[i].bits;
3390 for (j = 0; j < 10; j++)
3391 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3393 for (j = 0; j < 2; j++) {
3394 pp = p->mv_comp[i].class0_fp[j];
3395 c = s->counts.mv_comp[i].class0_fp[j];
3396 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3397 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3398 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3400 pp = p->mv_comp[i].fp;
3401 c = s->counts.mv_comp[i].fp;
3402 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3403 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3404 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3406 if (s->highprecisionmvs) {
3407 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3408 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3409 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3410 s->counts.mv_comp[i].hp[1], 20, 128);
3415 for (i = 0; i < 4; i++) {
3416 uint8_t *pp = p->y_mode[i];
3417 unsigned *c = s->counts.y_mode[i], sum, s2;
3419 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3420 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3421 sum -= c[TM_VP8_PRED];
3422 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3423 sum -= c[VERT_PRED];
3424 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3425 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3427 adapt_prob(&pp[3], s2, sum, 20, 128);
3429 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3430 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3431 sum -= c[DIAG_DOWN_LEFT_PRED];
3432 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3433 sum -= c[VERT_LEFT_PRED];
3434 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3435 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3439 for (i = 0; i < 10; i++) {
3440 uint8_t *pp = p->uv_mode[i];
3441 unsigned *c = s->counts.uv_mode[i], sum, s2;
3443 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3444 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3445 sum -= c[TM_VP8_PRED];
3446 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3447 sum -= c[VERT_PRED];
3448 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3449 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3451 adapt_prob(&pp[3], s2, sum, 20, 128);
3453 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3454 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3455 sum -= c[DIAG_DOWN_LEFT_PRED];
3456 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3457 sum -= c[VERT_LEFT_PRED];
3458 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3459 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3463 static void free_buffers(VP9Context *s)
3465 av_freep(&s->above_partition_ctx);
3466 av_freep(&s->b_base);
3467 av_freep(&s->block_base);
3470 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3472 VP9Context *s = ctx->priv_data;
3475 for (i = 0; i < 2; i++) {
3476 if (s->frames[i].tf.f->data[0])
3477 vp9_unref_frame(ctx, &s->frames[i]);
3478 av_frame_free(&s->frames[i].tf.f);
3480 for (i = 0; i < 8; i++) {
3481 if (s->refs[i].f->data[0])
3482 ff_thread_release_buffer(ctx, &s->refs[i]);
3483 av_frame_free(&s->refs[i].f);
3484 if (s->next_refs[i].f->data[0])
3485 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3486 av_frame_free(&s->next_refs[i].f);
3496 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3497 int *got_frame, AVPacket *pkt)
3499 const uint8_t *data = pkt->data;
3500 int size = pkt->size;
3501 VP9Context *s = ctx->priv_data;
3502 int res, tile_row, tile_col, i, ref, row, col;
3503 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3506 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3508 } else if (res == 0) {
3509 if (!s->refs[ref].f->data[0]) {
3510 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3511 return AVERROR_INVALIDDATA;
3513 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3521 if (s->frames[LAST_FRAME].tf.f->data[0])
3522 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3523 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3524 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3526 if (s->frames[CUR_FRAME].tf.f->data[0])
3527 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3528 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3530 f = s->frames[CUR_FRAME].tf.f;
3531 f->key_frame = s->keyframe;
3532 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3533 ls_y = f->linesize[0];
3534 ls_uv =f->linesize[1];
3537 for (i = 0; i < 8; i++) {
3538 if (s->next_refs[i].f->data[0])
3539 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3540 if (s->refreshrefmask & (1 << i)) {
3541 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3543 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3549 // main tile decode loop
3550 memset(s->above_partition_ctx, 0, s->cols);
3551 memset(s->above_skip_ctx, 0, s->cols);
3552 if (s->keyframe || s->intraonly) {
3553 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3555 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3557 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3558 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3559 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3560 memset(s->above_segpred_ctx, 0, s->cols);
3561 s->pass = s->uses_2pass =
3562 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3563 if (s->refreshctx && s->parallelmode) {
3566 for (i = 0; i < 4; i++) {
3567 for (j = 0; j < 2; j++)
3568 for (k = 0; k < 2; k++)
3569 for (l = 0; l < 6; l++)
3570 for (m = 0; m < 6; m++)
3571 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3572 s->prob.coef[i][j][k][l][m], 3);
3573 if (s->txfmmode == i)
3576 s->prob_ctx[s->framectxid].p = s->prob.p;
3577 ff_thread_finish_setup(ctx);
3583 s->block = s->block_base;
3584 s->uvblock[0] = s->uvblock_base[0];
3585 s->uvblock[1] = s->uvblock_base[1];
3586 s->eob = s->eob_base;
3587 s->uveob[0] = s->uveob_base[0];
3588 s->uveob[1] = s->uveob_base[1];
3590 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3591 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3592 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3594 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3597 if (tile_col == s->tiling.tile_cols - 1 &&
3598 tile_row == s->tiling.tile_rows - 1) {
3601 tile_size = AV_RB32(data);
3605 if (tile_size > size) {
3606 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3607 return AVERROR_INVALIDDATA;
3609 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3610 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3611 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3612 return AVERROR_INVALIDDATA;
3619 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3620 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3621 struct VP9Filter *lflvl_ptr = s->lflvl;
3622 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3624 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3625 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3626 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3629 memset(s->left_partition_ctx, 0, 8);
3630 memset(s->left_skip_ctx, 0, 8);
3631 if (s->keyframe || s->intraonly) {
3632 memset(s->left_mode_ctx, DC_PRED, 16);
3634 memset(s->left_mode_ctx, NEARESTMV, 8);
3636 memset(s->left_y_nnz_ctx, 0, 16);
3637 memset(s->left_uv_nnz_ctx, 0, 16);
3638 memset(s->left_segpred_ctx, 0, 8);
3640 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3643 for (col = s->tiling.tile_col_start;
3644 col < s->tiling.tile_col_end;
3645 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3646 // FIXME integrate with lf code (i.e. zero after each
3647 // use, similar to invtxfm coefficients, or similar)
3649 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3653 decode_sb_mem(ctx, row, col, lflvl_ptr,
3654 yoff2, uvoff2, BL_64X64);
3656 decode_sb(ctx, row, col, lflvl_ptr,
3657 yoff2, uvoff2, BL_64X64);
3661 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3669 // backup pre-loopfilter reconstruction data for intra
3670 // prediction of next row of sb64s
3671 if (row + 8 < s->rows) {
3672 memcpy(s->intra_pred_data[0],
3673 f->data[0] + yoff + 63 * ls_y,
3675 memcpy(s->intra_pred_data[1],
3676 f->data[1] + uvoff + 31 * ls_uv,
3678 memcpy(s->intra_pred_data[2],
3679 f->data[2] + uvoff + 31 * ls_uv,
3683 // loopfilter one row
3684 if (s->filter.level) {
3687 lflvl_ptr = s->lflvl;
3688 for (col = 0; col < s->cols;
3689 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3690 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3694 // FIXME maybe we can make this more finegrained by running the
3695 // loopfilter per-block instead of after each sbrow
3696 // In fact that would also make intra pred left preparation easier?
3697 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3701 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3703 ff_thread_finish_setup(ctx);
3705 } while (s->pass++ == 1);
3706 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3709 for (i = 0; i < 8; i++) {
3710 if (s->refs[i].f->data[0])
3711 ff_thread_release_buffer(ctx, &s->refs[i]);
3712 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3715 if (!s->invisible) {
3716 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3724 static void vp9_decode_flush(AVCodecContext *ctx)
3726 VP9Context *s = ctx->priv_data;
3729 for (i = 0; i < 2; i++)
3730 vp9_unref_frame(ctx, &s->frames[i]);
3731 for (i = 0; i < 8; i++)
3732 ff_thread_release_buffer(ctx, &s->refs[i]);
3735 static int init_frames(AVCodecContext *ctx)
3737 VP9Context *s = ctx->priv_data;
3740 for (i = 0; i < 2; i++) {
3741 s->frames[i].tf.f = av_frame_alloc();
3742 if (!s->frames[i].tf.f) {
3743 vp9_decode_free(ctx);
3744 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3745 return AVERROR(ENOMEM);
3748 for (i = 0; i < 8; i++) {
3749 s->refs[i].f = av_frame_alloc();
3750 s->next_refs[i].f = av_frame_alloc();
3751 if (!s->refs[i].f || !s->next_refs[i].f) {
3752 vp9_decode_free(ctx);
3753 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3754 return AVERROR(ENOMEM);
3761 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3763 VP9Context *s = ctx->priv_data;
3765 ctx->internal->allocate_progress = 1;
3766 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3767 ff_vp9dsp_init(&s->dsp);
3768 ff_videodsp_init(&s->vdsp, 8);
3769 s->filter.sharpness = -1;
3771 return init_frames(ctx);
3774 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
3776 return init_frames(avctx);
3779 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
3782 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
3784 // detect size changes in other threads
3785 if (s->above_partition_ctx &&
3786 (!ssrc->above_partition_ctx || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
3790 for (i = 0; i < 2; i++) {
3791 if (s->frames[i].tf.f->data[0])
3792 vp9_unref_frame(dst, &s->frames[i]);
3793 if (ssrc->frames[i].tf.f->data[0]) {
3794 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
3798 for (i = 0; i < 8; i++) {
3799 if (s->refs[i].f->data[0])
3800 ff_thread_release_buffer(dst, &s->refs[i]);
3801 if (ssrc->next_refs[i].f->data[0]) {
3802 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
3807 s->invisible = ssrc->invisible;
3808 s->keyframe = ssrc->keyframe;
3809 s->uses_2pass = ssrc->uses_2pass;
3810 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
3811 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
3812 if (ssrc->segmentation.enabled) {
3813 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
3814 sizeof(s->segmentation.feat));
3820 AVCodec ff_vp9_decoder = {
3822 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
3823 .type = AVMEDIA_TYPE_VIDEO,
3824 .id = AV_CODEC_ID_VP9,
3825 .priv_data_size = sizeof(VP9Context),
3826 .init = vp9_decode_init,
3827 .close = vp9_decode_free,
3828 .decode = vp9_decode_frame,
3829 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
3830 .flush = vp9_decode_flush,
3831 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
3832 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),