2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 #include "libavutil/avassert.h"
34 #define VP9_SYNCCODE 0x498342
73 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
74 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
77 typedef struct VP9Block {
78 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
79 enum FilterMode filter;
80 VP56mv mv[4 /* b_idx */][2 /* ref */];
82 enum TxfmMode tx, uvtx;
84 int row, row7, col, col7;
86 ptrdiff_t y_stride, uv_stride;
89 typedef struct VP9Context {
100 uint8_t keyframe, last_keyframe;
101 uint8_t invisible, last_invisible;
102 uint8_t use_last_frame_mvs;
108 uint8_t refreshrefmask;
109 uint8_t highprecisionmvs;
110 enum FilterMode filtermode;
111 uint8_t allowcompinter;
114 uint8_t parallelmode;
118 uint8_t varcompref[2];
119 AVFrame *refs[8], *f, *fb[10];
125 uint8_t mblim_lut[64];
133 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
138 uint8_t absolute_vals;
144 uint8_t skip_enabled;
153 unsigned log2_tile_cols, log2_tile_rows;
154 unsigned tile_cols, tile_rows;
155 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
157 unsigned sb_cols, sb_rows, rows, cols;
160 uint8_t coef[4][2][2][6][6][3];
164 uint8_t coef[4][2][2][6][6][11];
169 unsigned y_mode[4][10];
170 unsigned uv_mode[10][10];
171 unsigned filter[4][3];
172 unsigned mv_mode[7][4];
173 unsigned intra[4][2];
175 unsigned single_ref[5][2][2];
176 unsigned comp_ref[5][2];
177 unsigned tx32p[2][4];
178 unsigned tx16p[2][3];
181 unsigned mv_joint[4];
184 unsigned classes[11];
186 unsigned bits[10][2];
187 unsigned class0_fp[2][4];
189 unsigned class0_hp[2];
192 unsigned partition[4][4][4];
193 unsigned coef[4][2][2][6][6][3];
194 unsigned eob[4][2][2][6][6][2];
196 enum TxfmMode txfmmode;
197 enum CompPredMode comppredmode;
199 // contextual (left/above) cache
200 uint8_t left_partition_ctx[8], *above_partition_ctx;
201 uint8_t left_mode_ctx[16], *above_mode_ctx;
202 // FIXME maybe merge some of the below in a flags field?
203 uint8_t left_y_nnz_ctx[16], *above_y_nnz_ctx;
204 uint8_t left_uv_nnz_ctx[2][8], *above_uv_nnz_ctx[2];
205 uint8_t left_skip_ctx[8], *above_skip_ctx; // 1bit
206 uint8_t left_txfm_ctx[8], *above_txfm_ctx; // 2bit
207 uint8_t left_segpred_ctx[8], *above_segpred_ctx; // 1bit
208 uint8_t left_intra_ctx[8], *above_intra_ctx; // 1bit
209 uint8_t left_comp_ctx[8], *above_comp_ctx; // 1bit
210 uint8_t left_ref_ctx[8], *above_ref_ctx; // 2bit
211 uint8_t left_filter_ctx[8], *above_filter_ctx;
212 VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
215 uint8_t *intra_pred_data[3];
216 uint8_t *segmentation_map;
217 struct VP9mvrefPair *mv[2];
218 struct VP9Filter *lflvl;
219 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
221 // block reconstruction intermediates
222 DECLARE_ALIGNED(32, int16_t, block)[4096];
223 DECLARE_ALIGNED(32, int16_t, uvblock)[2][1024];
225 uint8_t uveob[2][64];
226 VP56mv min_mv, max_mv;
227 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
228 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
231 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
233 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
234 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
236 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
237 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
241 static int update_size(AVCodecContext *ctx, int w, int h)
243 VP9Context *s = ctx->priv_data;
246 if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
251 s->sb_cols = (w + 63) >> 6;
252 s->sb_rows = (h + 63) >> 6;
253 s->cols = (w + 7) >> 3;
254 s->rows = (h + 7) >> 3;
256 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
257 av_free(s->above_partition_ctx);
258 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
259 64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
261 return AVERROR(ENOMEM);
262 assign(s->above_partition_ctx, uint8_t *, 8);
263 assign(s->above_skip_ctx, uint8_t *, 8);
264 assign(s->above_txfm_ctx, uint8_t *, 8);
265 assign(s->above_mode_ctx, uint8_t *, 16);
266 assign(s->above_y_nnz_ctx, uint8_t *, 16);
267 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
268 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
269 assign(s->intra_pred_data[0], uint8_t *, 64);
270 assign(s->intra_pred_data[1], uint8_t *, 32);
271 assign(s->intra_pred_data[2], uint8_t *, 32);
272 assign(s->above_segpred_ctx, uint8_t *, 8);
273 assign(s->above_intra_ctx, uint8_t *, 8);
274 assign(s->above_comp_ctx, uint8_t *, 8);
275 assign(s->above_ref_ctx, uint8_t *, 8);
276 assign(s->above_filter_ctx, uint8_t *, 8);
277 assign(s->lflvl, struct VP9Filter *, 1);
278 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
279 assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
280 assign(s->mv[0], struct VP9mvrefPair *, 64 * s->sb_rows);
281 assign(s->mv[1], struct VP9mvrefPair *, 64 * s->sb_rows);
287 // for some reason the sign bit is at the end, not the start, of a bit sequence
288 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
290 int v = get_bits(gb, n);
291 return get_bits1(gb) ? -v : v;
294 static av_always_inline int inv_recenter_nonneg(int v, int m)
296 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
299 // differential forward probability updates
300 static int update_prob(VP56RangeCoder *c, int p)
302 static const int inv_map_table[254] = {
303 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
304 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
305 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
306 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
307 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
308 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
309 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
310 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
311 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
312 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
313 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
314 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
315 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
316 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
317 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
318 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
319 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
320 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
325 /* This code is trying to do a differential probability update. For a
326 * current probability A in the range [1, 255], the difference to a new
327 * probability of any value can be expressed differentially as 1-A,255-A
328 * where some part of this (absolute range) exists both in positive as
329 * well as the negative part, whereas another part only exists in one
330 * half. We're trying to code this shared part differentially, i.e.
331 * times two where the value of the lowest bit specifies the sign, and
332 * the single part is then coded on top of this. This absolute difference
333 * then again has a value of [0,254], but a bigger value in this range
334 * indicates that we're further away from the original value A, so we
335 * can code this as a VLC code, since higher values are increasingly
336 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
337 * updates vs. the 'fine, exact' updates further down the range, which
338 * adds one extra dimension to this differential update model. */
340 if (!vp8_rac_get(c)) {
341 d = vp8_rac_get_uint(c, 4) + 0;
342 } else if (!vp8_rac_get(c)) {
343 d = vp8_rac_get_uint(c, 4) + 16;
344 } else if (!vp8_rac_get(c)) {
345 d = vp8_rac_get_uint(c, 5) + 32;
347 d = vp8_rac_get_uint(c, 7);
349 d = (d << 1) - 65 + vp8_rac_get(c);
353 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
354 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
357 static int decode_frame_header(AVCodecContext *ctx,
358 const uint8_t *data, int size, int *ref)
360 VP9Context *s = ctx->priv_data;
361 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
362 const uint8_t *data2;
365 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
366 av_log(ctx, AV_LOG_ERROR, "Failed to intialize bitstream reader\n");
369 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
370 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
371 return AVERROR_INVALIDDATA;
373 s->profile = get_bits1(&s->gb);
374 if (get_bits1(&s->gb)) { // reserved bit
375 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
376 return AVERROR_INVALIDDATA;
378 if (get_bits1(&s->gb)) {
379 *ref = get_bits(&s->gb, 3);
382 s->last_keyframe = s->keyframe;
383 s->keyframe = !get_bits1(&s->gb);
384 s->last_invisible = s->invisible;
385 s->invisible = !get_bits1(&s->gb);
386 s->errorres = get_bits1(&s->gb);
387 // FIXME disable this upon resolution change
388 s->use_last_frame_mvs = !s->errorres && !s->last_invisible;
390 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
391 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
392 return AVERROR_INVALIDDATA;
394 s->colorspace = get_bits(&s->gb, 3);
395 if (s->colorspace == 7) { // RGB = profile 1
396 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
397 return AVERROR_INVALIDDATA;
399 s->fullrange = get_bits1(&s->gb);
400 // for profile 1, here follows the subsampling bits
401 s->refreshrefmask = 0xff;
402 w = get_bits(&s->gb, 16) + 1;
403 h = get_bits(&s->gb, 16) + 1;
404 if (get_bits1(&s->gb)) // display size
405 skip_bits(&s->gb, 32);
407 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
408 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
410 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
411 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
412 return AVERROR_INVALIDDATA;
414 s->refreshrefmask = get_bits(&s->gb, 8);
415 w = get_bits(&s->gb, 16) + 1;
416 h = get_bits(&s->gb, 16) + 1;
417 if (get_bits1(&s->gb)) // display size
418 skip_bits(&s->gb, 32);
420 s->refreshrefmask = get_bits(&s->gb, 8);
421 s->refidx[0] = get_bits(&s->gb, 3);
422 s->signbias[0] = get_bits1(&s->gb);
423 s->refidx[1] = get_bits(&s->gb, 3);
424 s->signbias[1] = get_bits1(&s->gb);
425 s->refidx[2] = get_bits(&s->gb, 3);
426 s->signbias[2] = get_bits1(&s->gb);
427 if (!s->refs[s->refidx[0]] || !s->refs[s->refidx[1]] ||
428 !s->refs[s->refidx[2]]) {
429 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
430 return AVERROR_INVALIDDATA;
432 if (get_bits1(&s->gb)) {
433 w = s->refs[s->refidx[0]]->width;
434 h = s->refs[s->refidx[0]]->height;
435 } else if (get_bits1(&s->gb)) {
436 w = s->refs[s->refidx[1]]->width;
437 h = s->refs[s->refidx[1]]->height;
438 } else if (get_bits1(&s->gb)) {
439 w = s->refs[s->refidx[2]]->width;
440 h = s->refs[s->refidx[2]]->height;
442 w = get_bits(&s->gb, 16) + 1;
443 h = get_bits(&s->gb, 16) + 1;
445 if (get_bits1(&s->gb)) // display size
446 skip_bits(&s->gb, 32);
447 s->highprecisionmvs = get_bits1(&s->gb);
448 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
450 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
451 s->signbias[0] != s->signbias[2];
452 if (s->allowcompinter) {
453 if (s->signbias[0] == s->signbias[1]) {
455 s->varcompref[0] = 0;
456 s->varcompref[1] = 1;
457 } else if (s->signbias[0] == s->signbias[2]) {
459 s->varcompref[0] = 0;
460 s->varcompref[1] = 2;
463 s->varcompref[0] = 1;
464 s->varcompref[1] = 2;
469 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
470 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
471 s->framectxid = c = get_bits(&s->gb, 2);
473 /* loopfilter header data */
474 s->filter.level = get_bits(&s->gb, 6);
475 sharp = get_bits(&s->gb, 3);
476 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
477 // the old cache values since they are still valid
478 if (s->filter.sharpness != sharp)
479 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
480 s->filter.sharpness = sharp;
481 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
482 if (get_bits1(&s->gb)) {
483 for (i = 0; i < 4; i++)
484 if (get_bits1(&s->gb))
485 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
486 for (i = 0; i < 2; i++)
487 if (get_bits1(&s->gb))
488 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
491 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
494 /* quantization header data */
495 s->yac_qi = get_bits(&s->gb, 8);
496 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
497 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
498 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
499 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
500 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
502 /* segmentation header info */
503 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
504 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
505 for (i = 0; i < 7; i++)
506 s->prob.seg[i] = get_bits1(&s->gb) ?
507 get_bits(&s->gb, 8) : 255;
508 if ((s->segmentation.temporal = get_bits1(&s->gb)))
509 for (i = 0; i < 3; i++)
510 s->prob.segpred[i] = get_bits1(&s->gb) ?
511 get_bits(&s->gb, 8) : 255;
514 if (get_bits1(&s->gb)) {
515 s->segmentation.absolute_vals = get_bits1(&s->gb);
516 for (i = 0; i < 8; i++) {
517 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
518 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
519 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
520 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
521 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
522 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
523 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
527 s->segmentation.feat[0].q_enabled = 0;
528 s->segmentation.feat[0].lf_enabled = 0;
529 s->segmentation.feat[0].skip_enabled = 0;
530 s->segmentation.feat[0].ref_enabled = 0;
533 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
534 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
535 int qyac, qydc, quvac, quvdc, lflvl, sh;
537 if (s->segmentation.feat[i].q_enabled) {
538 if (s->segmentation.absolute_vals)
539 qyac = s->segmentation.feat[i].q_val;
541 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
545 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
546 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
547 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
548 qyac = av_clip_uintp2(qyac, 8);
550 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
551 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
552 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
553 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
555 sh = s->filter.level >= 32;
556 if (s->segmentation.feat[i].lf_enabled) {
557 if (s->segmentation.absolute_vals)
558 lflvl = s->segmentation.feat[i].lf_val;
560 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
562 lflvl = s->filter.level;
564 s->segmentation.feat[i].lflvl[0][0] =
565 s->segmentation.feat[i].lflvl[0][1] =
566 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
567 for (j = 1; j < 4; j++) {
568 s->segmentation.feat[i].lflvl[j][0] =
569 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
570 s->lf_delta.mode[0]) << sh), 6);
571 s->segmentation.feat[i].lflvl[j][1] =
572 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
573 s->lf_delta.mode[1]) << sh), 6);
578 if ((res = update_size(ctx, w, h)) < 0) {
579 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
582 for (s->tiling.log2_tile_cols = 0;
583 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
584 s->tiling.log2_tile_cols++) ;
585 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
586 max = FFMAX(0, max - 1);
587 while (max > s->tiling.log2_tile_cols) {
588 if (get_bits1(&s->gb))
589 s->tiling.log2_tile_cols++;
593 s->tiling.log2_tile_rows = decode012(&s->gb);
594 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
595 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
596 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
597 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
598 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
600 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
601 return AVERROR(ENOMEM);
605 if (s->keyframe || s->errorres || s->intraonly) {
606 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
607 s->prob_ctx[3].p = vp9_default_probs;
608 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
609 sizeof(vp9_default_coef_probs));
610 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
611 sizeof(vp9_default_coef_probs));
612 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
613 sizeof(vp9_default_coef_probs));
614 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
615 sizeof(vp9_default_coef_probs));
618 // next 16 bits is size of the rest of the header (arith-coded)
619 size2 = get_bits(&s->gb, 16);
620 data2 = align_get_bits(&s->gb);
621 if (size2 > size - (data2 - data)) {
622 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
623 return AVERROR_INVALIDDATA;
625 ff_vp56_init_range_decoder(&s->c, data2, size2);
626 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
627 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
628 return AVERROR_INVALIDDATA;
631 if (s->keyframe || s->intraonly) {
632 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
634 memset(&s->counts, 0, sizeof(s->counts));
636 // FIXME is it faster to not copy here, but do it down in the fw updates
637 // as explicit copies if the fw update is missing (and skip the copy upon
639 s->prob.p = s->prob_ctx[c].p;
643 s->txfmmode = TX_4X4;
645 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
646 if (s->txfmmode == 3)
647 s->txfmmode += vp8_rac_get(&s->c);
649 if (s->txfmmode == TX_SWITCHABLE) {
650 for (i = 0; i < 2; i++)
651 if (vp56_rac_get_prob_branchy(&s->c, 252))
652 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
653 for (i = 0; i < 2; i++)
654 for (j = 0; j < 2; j++)
655 if (vp56_rac_get_prob_branchy(&s->c, 252))
656 s->prob.p.tx16p[i][j] =
657 update_prob(&s->c, s->prob.p.tx16p[i][j]);
658 for (i = 0; i < 2; i++)
659 for (j = 0; j < 3; j++)
660 if (vp56_rac_get_prob_branchy(&s->c, 252))
661 s->prob.p.tx32p[i][j] =
662 update_prob(&s->c, s->prob.p.tx32p[i][j]);
667 for (i = 0; i < 4; i++) {
668 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
669 if (vp8_rac_get(&s->c)) {
670 for (j = 0; j < 2; j++)
671 for (k = 0; k < 2; k++)
672 for (l = 0; l < 6; l++)
673 for (m = 0; m < 6; m++) {
674 uint8_t *p = s->prob.coef[i][j][k][l][m];
675 uint8_t *r = ref[j][k][l][m];
676 if (m >= 3 && l == 0) // dc only has 3 pt
678 for (n = 0; n < 3; n++) {
679 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
680 p[n] = update_prob(&s->c, r[n]);
688 for (j = 0; j < 2; j++)
689 for (k = 0; k < 2; k++)
690 for (l = 0; l < 6; l++)
691 for (m = 0; m < 6; m++) {
692 uint8_t *p = s->prob.coef[i][j][k][l][m];
693 uint8_t *r = ref[j][k][l][m];
694 if (m > 3 && l == 0) // dc only has 3 pt
700 if (s->txfmmode == i)
705 for (i = 0; i < 3; i++)
706 if (vp56_rac_get_prob_branchy(&s->c, 252))
707 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
708 if (!s->keyframe && !s->intraonly) {
709 for (i = 0; i < 7; i++)
710 for (j = 0; j < 3; j++)
711 if (vp56_rac_get_prob_branchy(&s->c, 252))
712 s->prob.p.mv_mode[i][j] =
713 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
715 if (s->filtermode == FILTER_SWITCHABLE)
716 for (i = 0; i < 4; i++)
717 for (j = 0; j < 2; j++)
718 if (vp56_rac_get_prob_branchy(&s->c, 252))
719 s->prob.p.filter[i][j] =
720 update_prob(&s->c, s->prob.p.filter[i][j]);
722 for (i = 0; i < 4; i++)
723 if (vp56_rac_get_prob_branchy(&s->c, 252))
724 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
726 if (s->allowcompinter) {
727 s->comppredmode = vp8_rac_get(&s->c);
729 s->comppredmode += vp8_rac_get(&s->c);
730 if (s->comppredmode == PRED_SWITCHABLE)
731 for (i = 0; i < 5; i++)
732 if (vp56_rac_get_prob_branchy(&s->c, 252))
734 update_prob(&s->c, s->prob.p.comp[i]);
736 s->comppredmode = PRED_SINGLEREF;
739 if (s->comppredmode != PRED_COMPREF) {
740 for (i = 0; i < 5; i++) {
741 if (vp56_rac_get_prob_branchy(&s->c, 252))
742 s->prob.p.single_ref[i][0] =
743 update_prob(&s->c, s->prob.p.single_ref[i][0]);
744 if (vp56_rac_get_prob_branchy(&s->c, 252))
745 s->prob.p.single_ref[i][1] =
746 update_prob(&s->c, s->prob.p.single_ref[i][1]);
750 if (s->comppredmode != PRED_SINGLEREF) {
751 for (i = 0; i < 5; i++)
752 if (vp56_rac_get_prob_branchy(&s->c, 252))
753 s->prob.p.comp_ref[i] =
754 update_prob(&s->c, s->prob.p.comp_ref[i]);
757 for (i = 0; i < 4; i++)
758 for (j = 0; j < 9; j++)
759 if (vp56_rac_get_prob_branchy(&s->c, 252))
760 s->prob.p.y_mode[i][j] =
761 update_prob(&s->c, s->prob.p.y_mode[i][j]);
763 for (i = 0; i < 4; i++)
764 for (j = 0; j < 4; j++)
765 for (k = 0; k < 3; k++)
766 if (vp56_rac_get_prob_branchy(&s->c, 252))
767 s->prob.p.partition[3 - i][j][k] =
768 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
770 // mv fields don't use the update_prob subexp model for some reason
771 for (i = 0; i < 3; i++)
772 if (vp56_rac_get_prob_branchy(&s->c, 252))
773 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
775 for (i = 0; i < 2; i++) {
776 if (vp56_rac_get_prob_branchy(&s->c, 252))
777 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
779 for (j = 0; j < 10; j++)
780 if (vp56_rac_get_prob_branchy(&s->c, 252))
781 s->prob.p.mv_comp[i].classes[j] =
782 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
784 if (vp56_rac_get_prob_branchy(&s->c, 252))
785 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
787 for (j = 0; j < 10; j++)
788 if (vp56_rac_get_prob_branchy(&s->c, 252))
789 s->prob.p.mv_comp[i].bits[j] =
790 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
793 for (i = 0; i < 2; i++) {
794 for (j = 0; j < 2; j++)
795 for (k = 0; k < 3; k++)
796 if (vp56_rac_get_prob_branchy(&s->c, 252))
797 s->prob.p.mv_comp[i].class0_fp[j][k] =
798 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
800 for (j = 0; j < 3; j++)
801 if (vp56_rac_get_prob_branchy(&s->c, 252))
802 s->prob.p.mv_comp[i].fp[j] =
803 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
806 if (s->highprecisionmvs) {
807 for (i = 0; i < 2; i++) {
808 if (vp56_rac_get_prob_branchy(&s->c, 252))
809 s->prob.p.mv_comp[i].class0_hp =
810 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
812 if (vp56_rac_get_prob_branchy(&s->c, 252))
813 s->prob.p.mv_comp[i].hp =
814 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
819 return (data2 - data) + size2;
822 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
825 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
826 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
829 static void find_ref_mvs(VP9Context *s,
830 VP56mv *pmv, int ref, int z, int idx, int sb)
832 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
833 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
834 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
835 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
836 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
837 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
838 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
839 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
840 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
841 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
842 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
843 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
844 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
845 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
846 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
847 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
848 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
849 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
850 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
851 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
852 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
853 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
854 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
855 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
856 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
857 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
858 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
860 VP9Block *const b = &s->b;
861 int row = b->row, col = b->col, row7 = b->row7;
862 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
863 #define INVALID_MV 0x80008000U
864 uint32_t mem = INVALID_MV;
867 #define RETURN_DIRECT_MV(mv) \
869 uint32_t m = AV_RN32A(&mv); \
873 } else if (mem == INVALID_MV) { \
875 } else if (m != mem) { \
882 if (sb == 2 || sb == 1) {
883 RETURN_DIRECT_MV(b->mv[0][z]);
884 } else if (sb == 3) {
885 RETURN_DIRECT_MV(b->mv[2][z]);
886 RETURN_DIRECT_MV(b->mv[1][z]);
887 RETURN_DIRECT_MV(b->mv[0][z]);
890 #define RETURN_MV(mv) \
895 clamp_mv(&tmp, &mv, s); \
896 m = AV_RN32A(&tmp); \
900 } else if (mem == INVALID_MV) { \
902 } else if (m != mem) { \
907 uint32_t m = AV_RN32A(&mv); \
909 clamp_mv(pmv, &mv, s); \
911 } else if (mem == INVALID_MV) { \
913 } else if (m != mem) { \
914 clamp_mv(pmv, &mv, s); \
921 struct VP9mvrefPair *mv = &s->mv[0][(row - 1) * s->sb_cols * 8 + col];
922 if (mv->ref[0] == ref) {
923 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
924 } else if (mv->ref[1] == ref) {
925 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
928 if (col > s->tiling.tile_col_start) {
929 struct VP9mvrefPair *mv = &s->mv[0][row * s->sb_cols * 8 + col - 1];
930 if (mv->ref[0] == ref) {
931 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
932 } else if (mv->ref[1] == ref) {
933 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
941 // previously coded MVs in this neighbourhood, using same reference frame
943 int c = p[i][0] + col, r = p[i][1] + row;
945 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
946 struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
948 if (mv->ref[0] == ref) {
949 RETURN_MV(mv->mv[0]);
950 } else if (mv->ref[1] == ref) {
951 RETURN_MV(mv->mv[1]);
956 // MV at this position in previous frame, using same reference frame
957 if (s->use_last_frame_mvs) {
958 struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
960 if (mv->ref[0] == ref) {
961 RETURN_MV(mv->mv[0]);
962 } else if (mv->ref[1] == ref) {
963 RETURN_MV(mv->mv[1]);
967 #define RETURN_SCALE_MV(mv, scale) \
970 VP56mv mv_temp = { -mv.x, -mv.y }; \
971 RETURN_MV(mv_temp); \
977 // previously coded MVs in this neighbourhood, using different reference frame
978 for (i = 0; i < 8; i++) {
979 int c = p[i][0] + col, r = p[i][1] + row;
981 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
982 struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
984 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
985 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
987 if (mv->ref[1] != ref && mv->ref[1] >= 0) {
988 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
993 // MV at this position in previous frame, using different reference frame
994 if (s->use_last_frame_mvs) {
995 struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
997 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
998 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1000 if (mv->ref[1] != ref && mv->ref[1] >= 0) {
1001 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1008 #undef RETURN_SCALE_MV
1011 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1013 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1014 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1015 s->prob.p.mv_comp[idx].classes);
1017 s->counts.mv_comp[idx].sign[sign]++;
1018 s->counts.mv_comp[idx].classes[c]++;
1022 for (n = 0, m = 0; m < c; m++) {
1023 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1025 s->counts.mv_comp[idx].bits[m][bit]++;
1028 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1030 s->counts.mv_comp[idx].fp[bit]++;
1032 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1033 s->counts.mv_comp[idx].hp[bit]++;
1037 // bug in libvpx - we count for bw entropy purposes even if the
1039 s->counts.mv_comp[idx].hp[1]++;
1043 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1044 s->counts.mv_comp[idx].class0[n]++;
1045 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1046 s->prob.p.mv_comp[idx].class0_fp[n]);
1047 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1048 n = (n << 3) | (bit << 1);
1050 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1051 s->counts.mv_comp[idx].class0_hp[bit]++;
1055 // bug in libvpx - we count for bw entropy purposes even if the
1057 s->counts.mv_comp[idx].class0_hp[1]++;
1061 return sign ? -(n + 1) : (n + 1);
1064 static void fill_mv(VP9Context *s,
1065 VP56mv *mv, int mode, int sb)
1067 VP9Block *const b = &s->b;
1069 if (mode == ZEROMV) {
1070 memset(mv, 0, sizeof(*mv) * 2);
1074 // FIXME cache this value and reuse for other subblocks
1075 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1076 mode == NEWMV ? -1 : sb);
1077 // FIXME maybe move this code into find_ref_mvs()
1078 if ((mode == NEWMV || sb == -1) &&
1079 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1093 if (mode == NEWMV) {
1094 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1095 s->prob.p.mv_joint);
1097 s->counts.mv_joint[j]++;
1098 if (j >= MV_JOINT_V)
1099 mv[0].y += read_mv_component(s, 0, hp);
1101 mv[0].x += read_mv_component(s, 1, hp);
1105 // FIXME cache this value and reuse for other subblocks
1106 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1107 mode == NEWMV ? -1 : sb);
1108 if ((mode == NEWMV || sb == -1) &&
1109 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1123 if (mode == NEWMV) {
1124 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1125 s->prob.p.mv_joint);
1127 s->counts.mv_joint[j]++;
1128 if (j >= MV_JOINT_V)
1129 mv[1].y += read_mv_component(s, 0, hp);
1131 mv[1].x += read_mv_component(s, 1, hp);
1137 static void decode_mode(AVCodecContext *ctx)
1139 static const uint8_t left_ctx[N_BS_SIZES] = {
1140 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1142 static const uint8_t above_ctx[N_BS_SIZES] = {
1143 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1145 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1146 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1147 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1149 VP9Context *s = ctx->priv_data;
1150 VP9Block *const b = &s->b;
1151 int row = b->row, col = b->col, row7 = b->row7;
1152 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1153 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1154 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1155 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1157 if (!s->segmentation.enabled) {
1159 } else if (s->keyframe || s->intraonly) {
1160 b->seg_id = s->segmentation.update_map ?
1161 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg) : 0;
1162 } else if (!s->segmentation.update_map ||
1163 (s->segmentation.temporal &&
1164 vp56_rac_get_prob_branchy(&s->c,
1165 s->prob.segpred[s->above_segpred_ctx[col] +
1166 s->left_segpred_ctx[row7]]))) {
1169 for (y = 0; y < h4; y++)
1170 for (x = 0; x < w4; x++)
1171 pred = FFMIN(pred, s->segmentation_map[(y + row) * 8 * s->sb_cols + x + col]);
1174 memset(&s->above_segpred_ctx[col], 1, w4);
1175 memset(&s->left_segpred_ctx[row7], 1, h4);
1177 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1180 memset(&s->above_segpred_ctx[col], 0, w4);
1181 memset(&s->left_segpred_ctx[row7], 0, h4);
1183 if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
1184 for (y = 0; y < h4; y++)
1185 memset(&s->segmentation_map[(y + row) * 8 * s->sb_cols + col],
1189 b->skip = s->segmentation.enabled &&
1190 s->segmentation.feat[b->seg_id].skip_enabled;
1192 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1193 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1194 s->counts.skip[c][b->skip]++;
1197 if (s->keyframe || s->intraonly) {
1199 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1200 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1204 if (have_a && have_l) {
1205 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1208 c = have_a ? 2 * s->above_intra_ctx[col] :
1209 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1211 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1212 s->counts.intra[c][bit]++;
1216 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1220 c = (s->above_skip_ctx[col] ? max_tx :
1221 s->above_txfm_ctx[col]) +
1222 (s->left_skip_ctx[row7] ? max_tx :
1223 s->left_txfm_ctx[row7]) > max_tx;
1225 c = s->above_skip_ctx[col] ? 1 :
1226 (s->above_txfm_ctx[col] * 2 > max_tx);
1228 } else if (have_l) {
1229 c = s->left_skip_ctx[row7] ? 1 :
1230 (s->left_txfm_ctx[row7] * 2 > max_tx);
1236 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1238 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1240 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1242 s->counts.tx32p[c][b->tx]++;
1245 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1247 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1248 s->counts.tx16p[c][b->tx]++;
1251 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1252 s->counts.tx8p[c][b->tx]++;
1259 b->tx = FFMIN(max_tx, s->txfmmode);
1262 if (s->keyframe || s->intraonly) {
1263 uint8_t *a = &s->above_mode_ctx[col * 2];
1264 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1267 if (b->bs > BS_8x8) {
1268 // FIXME the memory storage intermediates here aren't really
1269 // necessary, they're just there to make the code slightly
1271 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1272 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1273 if (b->bs != BS_8x4) {
1274 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1275 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1276 l[0] = a[1] = b->mode[1];
1278 l[0] = a[1] = b->mode[1] = b->mode[0];
1280 if (b->bs != BS_4x8) {
1281 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1282 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1283 if (b->bs != BS_8x4) {
1284 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1285 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1286 l[1] = a[1] = b->mode[3];
1288 l[1] = a[1] = b->mode[3] = b->mode[2];
1291 b->mode[2] = b->mode[0];
1292 l[1] = a[1] = b->mode[3] = b->mode[1];
1295 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1296 vp9_default_kf_ymode_probs[*a][*l]);
1297 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1298 // FIXME this can probably be optimized
1299 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1300 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1302 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1303 vp9_default_kf_uvmode_probs[b->mode[3]]);
1304 } else if (b->intra) {
1306 if (b->bs > BS_8x8) {
1307 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1308 s->prob.p.y_mode[0]);
1309 s->counts.y_mode[0][b->mode[0]]++;
1310 if (b->bs != BS_8x4) {
1311 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1312 s->prob.p.y_mode[0]);
1313 s->counts.y_mode[0][b->mode[1]]++;
1315 b->mode[1] = b->mode[0];
1317 if (b->bs != BS_4x8) {
1318 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1319 s->prob.p.y_mode[0]);
1320 s->counts.y_mode[0][b->mode[2]]++;
1321 if (b->bs != BS_8x4) {
1322 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1323 s->prob.p.y_mode[0]);
1324 s->counts.y_mode[0][b->mode[3]]++;
1326 b->mode[3] = b->mode[2];
1329 b->mode[2] = b->mode[0];
1330 b->mode[3] = b->mode[1];
1333 static const uint8_t size_group[10] = {
1334 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1336 int sz = size_group[b->bs];
1338 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1339 s->prob.p.y_mode[sz]);
1340 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1341 s->counts.y_mode[sz][b->mode[3]]++;
1343 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1344 s->prob.p.uv_mode[b->mode[3]]);
1345 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1347 static const uint8_t inter_mode_ctx_lut[14][14] = {
1348 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1349 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1350 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1351 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1352 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1353 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1354 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1355 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1356 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1357 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1358 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1359 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1360 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1361 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1364 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1365 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1367 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1369 // read comp_pred flag
1370 if (s->comppredmode != PRED_SWITCHABLE) {
1371 b->comp = s->comppredmode == PRED_COMPREF;
1375 // FIXME add intra as ref=0xff (or -1) to make these easier?
1378 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1380 } else if (s->above_comp_ctx[col]) {
1381 c = 2 + (s->left_intra_ctx[row7] ||
1382 s->left_ref_ctx[row7] == s->fixcompref);
1383 } else if (s->left_comp_ctx[row7]) {
1384 c = 2 + (s->above_intra_ctx[col] ||
1385 s->above_ref_ctx[col] == s->fixcompref);
1387 c = (!s->above_intra_ctx[col] &&
1388 s->above_ref_ctx[col] == s->fixcompref) ^
1389 (!s->left_intra_ctx[row7] &&
1390 s->left_ref_ctx[row & 7] == s->fixcompref);
1393 c = s->above_comp_ctx[col] ? 3 :
1394 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1396 } else if (have_l) {
1397 c = s->left_comp_ctx[row7] ? 3 :
1398 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1402 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1403 s->counts.comp[c][b->comp]++;
1406 // read actual references
1407 // FIXME probably cache a few variables here to prevent repetitive
1408 // memory accesses below
1409 if (b->comp) /* two references */ {
1410 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1412 b->ref[fix_idx] = s->fixcompref;
1413 // FIXME can this codeblob be replaced by some sort of LUT?
1416 if (s->above_intra_ctx[col]) {
1417 if (s->left_intra_ctx[row7]) {
1420 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1422 } else if (s->left_intra_ctx[row7]) {
1423 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1425 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1427 if (refl == refa && refa == s->varcompref[1]) {
1429 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1430 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1431 (refl == s->fixcompref && refa == s->varcompref[0])) {
1434 c = (refa == refl) ? 3 : 1;
1436 } else if (!s->left_comp_ctx[row7]) {
1437 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1440 c = (refl == s->varcompref[1] &&
1441 refa != s->varcompref[1]) ? 2 : 4;
1443 } else if (!s->above_comp_ctx[col]) {
1444 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1447 c = (refa == s->varcompref[1] &&
1448 refl != s->varcompref[1]) ? 2 : 4;
1451 c = (refl == refa) ? 4 : 2;
1455 if (s->above_intra_ctx[col]) {
1457 } else if (s->above_comp_ctx[col]) {
1458 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1460 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1463 } else if (have_l) {
1464 if (s->left_intra_ctx[row7]) {
1466 } else if (s->left_comp_ctx[row7]) {
1467 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1469 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1474 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1475 b->ref[var_idx] = s->varcompref[bit];
1476 s->counts.comp_ref[c][bit]++;
1477 } else /* single reference */ {
1480 if (have_a && !s->above_intra_ctx[col]) {
1481 if (have_l && !s->left_intra_ctx[row7]) {
1482 if (s->left_comp_ctx[row7]) {
1483 if (s->above_comp_ctx[col]) {
1484 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1485 !s->above_ref_ctx[col]);
1487 c = (3 * !s->above_ref_ctx[col]) +
1488 (!s->fixcompref || !s->left_ref_ctx[row7]);
1490 } else if (s->above_comp_ctx[col]) {
1491 c = (3 * !s->left_ref_ctx[row7]) +
1492 (!s->fixcompref || !s->above_ref_ctx[col]);
1494 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1496 } else if (s->above_intra_ctx[col]) {
1498 } else if (s->above_comp_ctx[col]) {
1499 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1501 c = 4 * (!s->above_ref_ctx[col]);
1503 } else if (have_l && !s->left_intra_ctx[row7]) {
1504 if (s->left_intra_ctx[row7]) {
1506 } else if (s->left_comp_ctx[row7]) {
1507 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1509 c = 4 * (!s->left_ref_ctx[row7]);
1514 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1515 s->counts.single_ref[c][0][bit]++;
1519 // FIXME can this codeblob be replaced by some sort of LUT?
1522 if (s->left_intra_ctx[row7]) {
1523 if (s->above_intra_ctx[col]) {
1525 } else if (s->above_comp_ctx[col]) {
1526 c = 1 + 2 * (s->fixcompref == 1 ||
1527 s->above_ref_ctx[col] == 1);
1528 } else if (!s->above_ref_ctx[col]) {
1531 c = 4 * (s->above_ref_ctx[col] == 1);
1533 } else if (s->above_intra_ctx[col]) {
1534 if (s->left_intra_ctx[row7]) {
1536 } else if (s->left_comp_ctx[row7]) {
1537 c = 1 + 2 * (s->fixcompref == 1 ||
1538 s->left_ref_ctx[row7] == 1);
1539 } else if (!s->left_ref_ctx[row7]) {
1542 c = 4 * (s->left_ref_ctx[row7] == 1);
1544 } else if (s->above_comp_ctx[col]) {
1545 if (s->left_comp_ctx[row7]) {
1546 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1547 c = 3 * (s->fixcompref == 1 ||
1548 s->left_ref_ctx[row7] == 1);
1552 } else if (!s->left_ref_ctx[row7]) {
1553 c = 1 + 2 * (s->fixcompref == 1 ||
1554 s->above_ref_ctx[col] == 1);
1556 c = 3 * (s->left_ref_ctx[row7] == 1) +
1557 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1559 } else if (s->left_comp_ctx[row7]) {
1560 if (!s->above_ref_ctx[col]) {
1561 c = 1 + 2 * (s->fixcompref == 1 ||
1562 s->left_ref_ctx[row7] == 1);
1564 c = 3 * (s->above_ref_ctx[col] == 1) +
1565 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1567 } else if (!s->above_ref_ctx[col]) {
1568 if (!s->left_ref_ctx[row7]) {
1571 c = 4 * (s->left_ref_ctx[row7] == 1);
1573 } else if (!s->left_ref_ctx[row7]) {
1574 c = 4 * (s->above_ref_ctx[col] == 1);
1576 c = 2 * (s->left_ref_ctx[row7] == 1) +
1577 2 * (s->above_ref_ctx[col] == 1);
1580 if (s->above_intra_ctx[col] ||
1581 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1583 } else if (s->above_comp_ctx[col]) {
1584 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1586 c = 4 * (s->above_ref_ctx[col] == 1);
1589 } else if (have_l) {
1590 if (s->left_intra_ctx[row7] ||
1591 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1593 } else if (s->left_comp_ctx[row7]) {
1594 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1596 c = 4 * (s->left_ref_ctx[row7] == 1);
1601 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1602 s->counts.single_ref[c][1][bit]++;
1603 b->ref[0] = 1 + bit;
1608 if (b->bs <= BS_8x8) {
1609 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1610 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1612 static const uint8_t off[10] = {
1613 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1616 // FIXME this needs to use the LUT tables from find_ref_mvs
1617 // because not all are -1,0/0,-1
1618 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1619 [s->left_mode_ctx[row7 + off[b->bs]]];
1621 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1622 s->prob.p.mv_mode[c]);
1623 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1624 s->counts.mv_mode[c][b->mode[0] - 10]++;
1628 if (s->filtermode == FILTER_SWITCHABLE) {
1631 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1632 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1633 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1634 s->left_filter_ctx[row7] : 3;
1636 c = s->above_filter_ctx[col];
1638 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1639 c = s->left_filter_ctx[row7];
1644 b->filter = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1645 s->prob.p.filter[c]);
1646 s->counts.filter[c][b->filter]++;
1648 b->filter = s->filtermode;
1651 if (b->bs > BS_8x8) {
1652 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1654 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1655 s->prob.p.mv_mode[c]);
1656 s->counts.mv_mode[c][b->mode[0] - 10]++;
1657 fill_mv(s, b->mv[0], b->mode[0], 0);
1659 if (b->bs != BS_8x4) {
1660 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1661 s->prob.p.mv_mode[c]);
1662 s->counts.mv_mode[c][b->mode[1] - 10]++;
1663 fill_mv(s, b->mv[1], b->mode[1], 1);
1665 b->mode[1] = b->mode[0];
1666 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1667 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1670 if (b->bs != BS_4x8) {
1671 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1672 s->prob.p.mv_mode[c]);
1673 s->counts.mv_mode[c][b->mode[2] - 10]++;
1674 fill_mv(s, b->mv[2], b->mode[2], 2);
1676 if (b->bs != BS_8x4) {
1677 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1678 s->prob.p.mv_mode[c]);
1679 s->counts.mv_mode[c][b->mode[3] - 10]++;
1680 fill_mv(s, b->mv[3], b->mode[3], 3);
1682 b->mode[3] = b->mode[2];
1683 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1684 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1687 b->mode[2] = b->mode[0];
1688 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1689 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1690 b->mode[3] = b->mode[1];
1691 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1692 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1695 fill_mv(s, b->mv[0], b->mode[0], -1);
1696 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1697 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1698 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1699 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1700 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1701 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1705 // FIXME this can probably be optimized
1706 memset(&s->above_skip_ctx[col], b->skip, w4);
1707 memset(&s->left_skip_ctx[row7], b->skip, h4);
1708 memset(&s->above_txfm_ctx[col], b->tx, w4);
1709 memset(&s->left_txfm_ctx[row7], b->tx, h4);
1710 memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
1711 memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
1712 if (!s->keyframe && !s->intraonly) {
1713 memset(&s->above_intra_ctx[col], b->intra, w4);
1714 memset(&s->left_intra_ctx[row7], b->intra, h4);
1715 memset(&s->above_comp_ctx[col], b->comp, w4);
1716 memset(&s->left_comp_ctx[row7], b->comp, h4);
1717 memset(&s->above_mode_ctx[col], b->mode[3], w4);
1718 memset(&s->left_mode_ctx[row7], b->mode[3], h4);
1719 if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
1720 memset(&s->above_filter_ctx[col], b->filter, w4);
1721 memset(&s->left_filter_ctx[row7], b->filter, h4);
1722 b->filter = vp9_filter_lut[b->filter];
1724 if (b->bs > BS_8x8) {
1725 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1727 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1728 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1729 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1730 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1731 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1732 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1733 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1734 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1736 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1738 for (n = 0; n < w4 * 2; n++) {
1739 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1740 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1742 for (n = 0; n < h4 * 2; n++) {
1743 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1744 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1748 if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
1749 // as a direct check in above branches
1750 int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1752 memset(&s->above_ref_ctx[col], vref, w4);
1753 memset(&s->left_ref_ctx[row7], vref, h4);
1758 for (y = 0; y < h4; y++) {
1759 int x, o = (row + y) * s->sb_cols * 8 + col;
1762 for (x = 0; x < w4; x++) {
1763 s->mv[0][o + x].ref[0] =
1764 s->mv[0][o + x].ref[1] = -1;
1766 } else if (b->comp) {
1767 for (x = 0; x < w4; x++) {
1768 s->mv[0][o + x].ref[0] = b->ref[0];
1769 s->mv[0][o + x].ref[1] = b->ref[1];
1770 AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1771 AV_COPY32(&s->mv[0][o + x].mv[1], &b->mv[3][1]);
1774 for (x = 0; x < w4; x++) {
1775 s->mv[0][o + x].ref[0] = b->ref[0];
1776 s->mv[0][o + x].ref[1] = -1;
1777 AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1783 // FIXME remove tx argument, and merge cnt/eob arguments?
1784 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
1785 enum TxfmMode tx, unsigned (*cnt)[6][3],
1786 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
1787 int nnz, const int16_t *scan, const int16_t (*nb)[2],
1788 const int16_t *band_counts, const int16_t *qmul)
1790 int i = 0, band = 0, band_left = band_counts[band];
1791 uint8_t *tp = p[0][nnz];
1792 uint8_t cache[1024];
1797 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
1798 eob[band][nnz][val]++;
1803 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
1804 cnt[band][nnz][0]++;
1806 band_left = band_counts[++band];
1808 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1810 if (++i == n_coeffs)
1811 break; //invalid input; blocks should end with EOB
1816 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
1817 cnt[band][nnz][1]++;
1821 // fill in p[3-10] (model fill) - only once per frame for each pos
1823 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
1825 cnt[band][nnz][2]++;
1826 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
1827 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
1828 cache[rc] = val = 2;
1830 val = 3 + vp56_rac_get_prob(c, tp[5]);
1833 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
1835 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
1836 val = 5 + vp56_rac_get_prob(c, 159);
1838 val = 7 + (vp56_rac_get_prob(c, 165) << 1) +
1839 vp56_rac_get_prob(c, 145);
1843 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
1844 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
1845 val = 11 + (vp56_rac_get_prob(c, 173) << 2) +
1846 (vp56_rac_get_prob(c, 148) << 1) +
1847 vp56_rac_get_prob(c, 140);
1849 val = 19 + (vp56_rac_get_prob(c, 176) << 3) +
1850 (vp56_rac_get_prob(c, 155) << 2) +
1851 (vp56_rac_get_prob(c, 140) << 1) +
1852 vp56_rac_get_prob(c, 135);
1854 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
1855 val = 35 + (vp56_rac_get_prob(c, 180) << 4) +
1856 (vp56_rac_get_prob(c, 157) << 3) +
1857 (vp56_rac_get_prob(c, 141) << 2) +
1858 (vp56_rac_get_prob(c, 134) << 1) +
1859 vp56_rac_get_prob(c, 130);
1861 val = 67 + (vp56_rac_get_prob(c, 254) << 13) +
1862 (vp56_rac_get_prob(c, 254) << 12) +
1863 (vp56_rac_get_prob(c, 254) << 11) +
1864 (vp56_rac_get_prob(c, 252) << 10) +
1865 (vp56_rac_get_prob(c, 249) << 9) +
1866 (vp56_rac_get_prob(c, 243) << 8) +
1867 (vp56_rac_get_prob(c, 230) << 7) +
1868 (vp56_rac_get_prob(c, 196) << 6) +
1869 (vp56_rac_get_prob(c, 177) << 5) +
1870 (vp56_rac_get_prob(c, 153) << 4) +
1871 (vp56_rac_get_prob(c, 140) << 3) +
1872 (vp56_rac_get_prob(c, 133) << 2) +
1873 (vp56_rac_get_prob(c, 130) << 1) +
1874 vp56_rac_get_prob(c, 129);
1879 band_left = band_counts[++band];
1880 if (tx == TX_32X32) // FIXME slow
1881 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
1883 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
1884 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1886 } while (++i < n_coeffs);
1891 static int decode_coeffs(AVCodecContext *ctx)
1893 VP9Context *s = ctx->priv_data;
1894 VP9Block *const b = &s->b;
1895 int row = b->row, col = b->col;
1896 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
1897 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
1898 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
1899 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
1900 int end_x = FFMIN(2 * (s->cols - col), w4);
1901 int end_y = FFMIN(2 * (s->rows - row), h4);
1902 int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
1903 int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
1904 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
1905 int tx = 4 * s->lossless + b->tx;
1906 const int16_t **yscans = vp9_scans[tx];
1907 const int16_t (**ynbs)[2] = vp9_scans_nb[tx];
1908 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
1909 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
1910 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
1911 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
1912 static const int16_t band_counts[4][6] = {
1913 { 1, 2, 3, 4, 3, 16 - 13 },
1914 { 1, 2, 3, 4, 11, 64 - 21 },
1915 { 1, 2, 3, 4, 11, 256 - 21 },
1916 { 1, 2, 3, 4, 11, 1024 - 21 },
1918 const int16_t *y_band_counts = band_counts[b->tx];
1919 const int16_t *uv_band_counts = band_counts[b->uvtx];
1922 if (b->tx > TX_4X4) { // FIXME slow
1923 for (y = 0; y < end_y; y += step1d)
1924 for (x = 1; x < step1d; x++)
1926 for (x = 0; x < end_x; x += step1d)
1927 for (y = 1; y < step1d; y++)
1930 for (n = 0, y = 0; y < end_y; y += step1d) {
1931 for (x = 0; x < end_x; x += step1d, n += step) {
1932 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
1935 int nnz = a[x] + l[y];
1936 if ((res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
1937 b->tx, c, e, p, nnz, yscans[txtp],
1938 ynbs[txtp], y_band_counts, qmul[0])) < 0)
1940 a[x] = l[y] = !!res;
1941 if (b->tx > TX_8X8) {
1942 AV_WN16A(&s->eob[n], res);
1948 if (b->tx > TX_4X4) { // FIXME slow
1949 for (y = 0; y < end_y; y += step1d)
1950 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
1951 for (x = 0; x < end_x; x += step1d)
1952 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
1955 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1956 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1957 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1962 for (pl = 0; pl < 2; pl++) {
1963 a = &s->above_uv_nnz_ctx[pl][col];
1964 l = &s->left_uv_nnz_ctx[pl][row & 7];
1965 if (b->uvtx > TX_4X4) { // FIXME slow
1966 for (y = 0; y < end_y; y += uvstep1d)
1967 for (x = 1; x < uvstep1d; x++)
1969 for (x = 0; x < end_x; x += uvstep1d)
1970 for (y = 1; y < uvstep1d; y++)
1973 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
1974 for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
1975 int nnz = a[x] + l[y];
1976 if ((res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
1977 16 * uvstep, b->uvtx, c, e, p, nnz,
1978 uvscan, uvnb, uv_band_counts,
1981 a[x] = l[y] = !!res;
1982 if (b->uvtx > TX_8X8) {
1983 AV_WN16A(&s->uveob[pl][n], res);
1985 s->uveob[pl][n] = res;
1989 if (b->uvtx > TX_4X4) { // FIXME slow
1990 for (y = 0; y < end_y; y += uvstep1d)
1991 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
1992 for (x = 0; x < end_x; x += uvstep1d)
1993 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
2000 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2001 uint8_t *dst_edge, ptrdiff_t stride_edge,
2002 uint8_t *dst_inner, ptrdiff_t stride_inner,
2003 uint8_t *l, int col, int x, int w,
2004 int row, int y, enum TxfmMode tx,
2007 int have_top = row > 0 || y > 0;
2008 int have_left = col > s->tiling.tile_col_start || x > 0;
2009 int have_right = x < w - 1;
2010 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2011 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2012 { DC_127_PRED, VERT_PRED } },
2013 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2014 { HOR_PRED, HOR_PRED } },
2015 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2016 { LEFT_DC_PRED, DC_PRED } },
2017 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2018 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2019 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2020 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2021 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2022 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2023 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2024 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2025 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2026 { DC_127_PRED, VERT_LEFT_PRED } },
2027 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2028 { HOR_UP_PRED, HOR_UP_PRED } },
2029 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2030 { HOR_PRED, TM_VP8_PRED } },
2032 static const struct {
2033 uint8_t needs_left:1;
2034 uint8_t needs_top:1;
2035 uint8_t needs_topleft:1;
2036 uint8_t needs_topright:1;
2037 } edges[N_INTRA_PRED_MODES] = {
2038 [VERT_PRED] = { .needs_top = 1 },
2039 [HOR_PRED] = { .needs_left = 1 },
2040 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2041 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2042 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2043 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2044 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2045 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2046 [HOR_UP_PRED] = { .needs_left = 1 },
2047 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2048 [LEFT_DC_PRED] = { .needs_left = 1 },
2049 [TOP_DC_PRED] = { .needs_top = 1 },
2050 [DC_128_PRED] = { 0 },
2051 [DC_127_PRED] = { 0 },
2052 [DC_129_PRED] = { 0 }
2055 av_assert2(mode >= 0 && mode < 10);
2056 mode = mode_conv[mode][have_left][have_top];
2057 if (edges[mode].needs_top) {
2058 uint8_t *top, *topleft;
2059 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2060 int n_px_need_tr = 0;
2062 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2065 // if top of sb64-row, use s->intra_pred_data[] instead of
2066 // dst[-stride] for intra prediction (it contains pre- instead of
2067 // post-loopfilter data)
2069 top = !(row & 7) && !y ?
2070 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2071 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2073 topleft = !(row & 7) && !y ?
2074 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2075 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2076 &dst_inner[-stride_inner];
2080 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2081 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2082 n_px_need + n_px_need_tr <= n_px_have) {
2086 if (n_px_need <= n_px_have) {
2087 memcpy(*a, top, n_px_need);
2089 memcpy(*a, top, n_px_have);
2090 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2091 n_px_need - n_px_have);
2094 memset(*a, 127, n_px_need);
2096 if (edges[mode].needs_topleft) {
2097 if (have_left && have_top) {
2098 (*a)[-1] = topleft[-1];
2100 (*a)[-1] = have_top ? 129 : 127;
2103 if (tx == TX_4X4 && edges[mode].needs_topright) {
2104 if (have_top && have_right &&
2105 n_px_need + n_px_need_tr <= n_px_have) {
2106 memcpy(&(*a)[4], &top[4], 4);
2108 memset(&(*a)[4], (*a)[3], 4);
2113 if (edges[mode].needs_left) {
2115 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2116 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2117 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2119 if (n_px_need <= n_px_have) {
2120 for (i = 0; i < n_px_need; i++)
2121 l[i] = dst[i * stride - 1];
2123 for (i = 0; i < n_px_have; i++)
2124 l[i] = dst[i * stride - 1];
2125 memset(&l[i], l[i - 1], n_px_need - n_px_have);
2128 memset(l, 129, 4 << tx);
2135 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2137 VP9Context *s = ctx->priv_data;
2138 VP9Block *const b = &s->b;
2139 int row = b->row, col = b->col;
2140 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2141 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2142 int end_x = FFMIN(2 * (s->cols - col), w4);
2143 int end_y = FFMIN(2 * (s->rows - row), h4);
2144 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2145 int uvstep1d = 1 << b->uvtx, p;
2146 uint8_t *dst = b->dst[0], *dst_r = s->f->data[0] + y_off;
2148 for (n = 0, y = 0; y < end_y; y += step1d) {
2149 uint8_t *ptr = dst, *ptr_r = dst_r;
2150 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2151 ptr_r += 4 * step1d, n += step) {
2152 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2154 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2155 uint8_t *a = &a_buf[16], l[32];
2156 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2157 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2159 mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[0],
2160 ptr, b->y_stride, l,
2161 col, x, w4, row, y, b->tx, 0);
2162 s->dsp.intra_pred[b->tx][mode](ptr, b->y_stride, l, a);
2164 s->dsp.itxfm_add[tx][txtp](ptr, b->y_stride,
2165 s->block + 16 * n, eob);
2167 dst_r += 4 * s->f->linesize[0] * step1d;
2168 dst += 4 * b->y_stride * step1d;
2176 step = 1 << (b->uvtx * 2);
2177 for (p = 0; p < 2; p++) {
2178 dst = b->dst[1 + p];
2179 dst_r = s->f->data[1 + p] + uv_off;
2180 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2181 uint8_t *ptr = dst, *ptr_r = dst_r;
2182 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2183 ptr_r += 4 * uvstep1d, n += step) {
2184 int mode = b->uvmode;
2185 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2186 uint8_t *a = &a_buf[16], l[32];
2187 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2189 mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[1],
2190 ptr, b->uv_stride, l,
2191 col, x, w4, row, y, b->uvtx, p + 1);
2192 s->dsp.intra_pred[b->uvtx][mode](ptr, b->uv_stride, l, a);
2194 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2195 s->uvblock[p] + 16 * n, eob);
2197 dst_r += 4 * uvstep1d * s->f->linesize[1];
2198 dst += 4 * uvstep1d * b->uv_stride;
2203 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2204 uint8_t *dst, ptrdiff_t dst_stride,
2205 const uint8_t *ref, ptrdiff_t ref_stride,
2206 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2207 int bw, int bh, int w, int h)
2209 int mx = mv->x, my = mv->y;
2213 ref += y * ref_stride + x;
2216 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2217 if (x < !!mx * 3 || y < !!my * 3 ||
2218 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2219 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, 80,
2220 ref - !!my * 3 * ref_stride - !!mx * 3,
2222 bw + !!mx * 7, bh + !!my * 7,
2223 x - !!mx * 3, y - !!my * 3, w, h);
2224 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2227 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2230 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2231 uint8_t *dst_u, uint8_t *dst_v,
2232 ptrdiff_t dst_stride,
2233 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2234 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2235 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2236 int bw, int bh, int w, int h)
2238 int mx = mv->x, my = mv->y;
2242 ref_u += y * src_stride_u + x;
2243 ref_v += y * src_stride_v + x;
2246 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2247 if (x < !!mx * 3 || y < !!my * 3 ||
2248 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2249 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, 80,
2250 ref_u - !!my * 3 * src_stride_u - !!mx * 3, src_stride_u,
2251 bw + !!mx * 7, bh + !!my * 7,
2252 x - !!mx * 3, y - !!my * 3, w, h);
2253 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2254 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2256 s->vdsp.emulated_edge_mc(s->edge_emu_buffer, 80,
2257 ref_v - !!my * 3 * src_stride_v - !!mx * 3, src_stride_v,
2258 bw + !!mx * 7, bh + !!my * 7,
2259 x - !!mx * 3, y - !!my * 3, w, h);
2260 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2261 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2263 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2264 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2268 static void inter_recon(AVCodecContext *ctx)
2270 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2271 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2272 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2274 VP9Context *s = ctx->priv_data;
2275 VP9Block *const b = &s->b;
2276 int row = b->row, col = b->col;
2277 AVFrame *ref1 = s->refs[s->refidx[b->ref[0]]];
2278 AVFrame *ref2 = b->comp ? s->refs[s->refidx[b->ref[1]]] : NULL;
2279 int w = ctx->width, h = ctx->height;
2280 ptrdiff_t ls_y = b->y_stride, ls_uv = b->uv_stride;
2283 if (b->bs > BS_8x8) {
2284 if (b->bs == BS_8x4) {
2285 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], b->dst[0], ls_y,
2286 ref1->data[0], ref1->linesize[0],
2287 row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
2288 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2289 b->dst[0] + 4 * ls_y, ls_y,
2290 ref1->data[0], ref1->linesize[0],
2291 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
2294 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], b->dst[0], ls_y,
2295 ref2->data[0], ref2->linesize[0],
2296 row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
2297 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2298 b->dst[0] + 4 * ls_y, ls_y,
2299 ref2->data[0], ref2->linesize[0],
2300 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
2302 } else if (b->bs == BS_4x8) {
2303 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2304 ref1->data[0], ref1->linesize[0],
2305 row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
2306 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2307 ref1->data[0], ref1->linesize[0],
2308 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
2311 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2312 ref2->data[0], ref2->linesize[0],
2313 row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
2314 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2315 ref2->data[0], ref2->linesize[0],
2316 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
2319 av_assert2(b->bs == BS_4x4);
2321 // FIXME if two horizontally adjacent blocks have the same MV,
2322 // do a w8 instead of a w4 call
2323 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2324 ref1->data[0], ref1->linesize[0],
2325 row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
2326 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2327 ref1->data[0], ref1->linesize[0],
2328 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
2329 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2330 b->dst[0] + 4 * ls_y, ls_y,
2331 ref1->data[0], ref1->linesize[0],
2332 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
2333 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2334 b->dst[0] + 4 * ls_y + 4, ls_y,
2335 ref1->data[0], ref1->linesize[0],
2336 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
2339 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2340 ref2->data[0], ref2->linesize[0],
2341 row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
2342 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2343 ref2->data[0], ref2->linesize[0],
2344 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
2345 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2346 b->dst[0] + 4 * ls_y, ls_y,
2347 ref2->data[0], ref2->linesize[0],
2348 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
2349 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2350 b->dst[0] + 4 * ls_y + 4, ls_y,
2351 ref2->data[0], ref2->linesize[0],
2352 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
2356 int bwl = bwlog_tab[0][b->bs];
2357 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2359 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], b->dst[0], ls_y,
2360 ref1->data[0], ref1->linesize[0],
2361 row << 3, col << 3, &b->mv[0][0],bw, bh, w, h);
2364 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], b->dst[0], ls_y,
2365 ref2->data[0], ref2->linesize[0],
2366 row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
2371 int bwl = bwlog_tab[1][b->bs];
2372 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2377 if (b->bs > BS_8x8) {
2378 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2379 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2384 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2385 b->dst[1], b->dst[2], ls_uv,
2386 ref1->data[1], ref1->linesize[1],
2387 ref1->data[2], ref1->linesize[2],
2388 row << 2, col << 2, &mvuv, bw, bh, w, h);
2391 if (b->bs > BS_8x8) {
2392 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2393 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2397 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2398 b->dst[1], b->dst[2], ls_uv,
2399 ref2->data[1], ref2->linesize[1],
2400 ref2->data[2], ref2->linesize[2],
2401 row << 2, col << 2, &mvuv, bw, bh, w, h);
2406 /* mostly copied intra_reconn() */
2408 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2409 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2410 int end_x = FFMIN(2 * (s->cols - col), w4);
2411 int end_y = FFMIN(2 * (s->rows - row), h4);
2412 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2413 int uvstep1d = 1 << b->uvtx, p;
2414 uint8_t *dst = b->dst[0];
2417 for (n = 0, y = 0; y < end_y; y += step1d) {
2419 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2420 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2423 s->dsp.itxfm_add[tx][DCT_DCT](ptr, b->y_stride,
2424 s->block + 16 * n, eob);
2426 dst += 4 * b->y_stride * step1d;
2434 step = 1 << (b->uvtx * 2);
2435 for (p = 0; p < 2; p++) {
2436 dst = b->dst[p + 1];
2437 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2439 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2440 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2443 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2444 s->uvblock[p] + 16 * n, eob);
2446 dst += 4 * uvstep1d * b->uv_stride;
2452 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2453 int row_and_7, int col_and_7,
2454 int w, int h, int col_end, int row_end,
2455 enum TxfmMode tx, int skip_inter)
2457 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2458 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2459 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2460 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2462 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2463 // edges. This means that for UV, we work on two subsampled blocks at
2464 // a time, and we only use the topleft block's mode information to set
2465 // things like block strength. Thus, for any block size smaller than
2466 // 16x16, ignore the odd portion of the block.
2467 if (tx == TX_4X4 && is_uv) {
2482 if (tx == TX_4X4 && !skip_inter) {
2483 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2484 int m_col_odd = (t << (w - 1)) - t;
2486 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2488 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2490 for (y = row_and_7; y < h + row_and_7; y++) {
2491 int col_mask_id = 2 - !(y & 7);
2493 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2494 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2495 // for odd lines, if the odd col is not being filtered,
2496 // skip odd row also:
2503 // if a/c are even row/col and b/d are odd, and d is skipped,
2504 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2505 if ((col_end & 1) && (y & 1)) {
2506 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2508 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2512 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2514 for (y = row_and_7; y < h + row_and_7; y++) {
2515 int col_mask_id = 2 - !(y & 3);
2517 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2518 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2519 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2520 lflvl->mask[is_uv][0][y][3] |= m_col;
2521 lflvl->mask[is_uv][1][y][3] |= m_col;
2525 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2528 int mask_id = (tx == TX_8X8);
2529 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2530 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2531 int m_row = m_col & masks[l2];
2533 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2534 // 8wd loopfilter to prevent going off the visible edge.
2535 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2536 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2537 int m_row_8 = m_row - m_row_16;
2539 for (y = row_and_7; y < h + row_and_7; y++) {
2540 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2541 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2544 for (y = row_and_7; y < h + row_and_7; y++)
2545 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2548 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2549 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2550 lflvl->mask[is_uv][1][y][0] |= m_col;
2551 if (y - row_and_7 == h - 1)
2552 lflvl->mask[is_uv][1][y][1] |= m_col;
2554 for (y = row_and_7; y < h + row_and_7; y += step1d)
2555 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2557 } else if (tx != TX_4X4) {
2560 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2561 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2562 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2563 for (y = row_and_7; y < h + row_and_7; y++)
2564 lflvl->mask[is_uv][0][y][mask_id] |= t;
2566 int t8 = t & 0x01, t4 = t - t8;
2568 for (y = row_and_7; y < h + row_and_7; y++) {
2569 lflvl->mask[is_uv][0][y][2] |= t4;
2570 lflvl->mask[is_uv][0][y][1] |= t8;
2572 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2574 int t8 = t & 0x11, t4 = t - t8;
2576 for (y = row_and_7; y < h + row_and_7; y++) {
2577 lflvl->mask[is_uv][0][y][2] |= t4;
2578 lflvl->mask[is_uv][0][y][1] |= t8;
2580 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2585 static int decode_b(AVCodecContext *ctx, int row, int col,
2586 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2587 enum BlockLevel bl, enum BlockPartition bp)
2589 VP9Context *s = ctx->priv_data;
2590 VP9Block *const b = &s->b;
2591 enum BlockSize bs = bl * 3 + bp;
2592 int res, y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2599 s->min_mv.x = -(128 + col * 64);
2600 s->min_mv.y = -(128 + row * 64);
2601 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2602 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2605 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2608 if ((res = decode_coeffs(ctx)) < 0)
2613 memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
2614 memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
2615 for (pl = 0; pl < 2; pl++) {
2616 memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
2617 memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
2621 // emulated overhangs if the stride of the target buffer can't hold. This
2622 // allows to support emu-edge and so on even if we have large block
2624 emu[0] = (col + w4) * 8 > s->f->linesize[0] ||
2625 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2626 emu[1] = (col + w4) * 4 > s->f->linesize[1] ||
2627 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2629 b->dst[0] = s->tmp_y;
2632 b->dst[0] = s->f->data[0] + yoff;
2633 b->y_stride = s->f->linesize[0];
2636 b->dst[1] = s->tmp_uv[0];
2637 b->dst[2] = s->tmp_uv[1];
2640 b->dst[1] = s->f->data[1] + uvoff;
2641 b->dst[2] = s->f->data[2] + uvoff;
2642 b->uv_stride = s->f->linesize[1];
2645 intra_recon(ctx, yoff, uvoff);
2650 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2652 for (n = 0; o < w; n++) {
2657 s->dsp.mc[n][0][0][0][0](s->f->data[0] + yoff + o, s->f->linesize[0],
2658 s->tmp_y + o, 64, h, 0, 0);
2664 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2666 for (n = 1; o < w; n++) {
2671 s->dsp.mc[n][0][0][0][0](s->f->data[1] + uvoff + o, s->f->linesize[1],
2672 s->tmp_uv[0] + o, 32, h, 0, 0);
2673 s->dsp.mc[n][0][0][0][0](s->f->data[2] + uvoff + o, s->f->linesize[2],
2674 s->tmp_uv[1] + o, 32, h, 0, 0);
2680 // pick filter level and find edges to apply filter to
2681 if (s->filter.level &&
2682 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
2683 [b->mode[3] != ZEROMV]) > 0) {
2684 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
2685 int skip_inter = !b->intra && b->skip;
2687 for (y = 0; y < h4; y++)
2688 memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
2689 mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
2690 mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
2691 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
2692 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
2693 b->uvtx, skip_inter);
2695 if (!s->filter.lim_lut[lvl]) {
2696 int sharp = s->filter.sharpness;
2700 limit >>= (sharp + 3) >> 2;
2701 limit = FFMIN(limit, 9 - sharp);
2703 limit = FFMAX(limit, 1);
2705 s->filter.lim_lut[lvl] = limit;
2706 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
2713 static int decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2714 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2716 VP9Context *s = ctx->priv_data;
2717 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
2718 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1), res;
2719 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
2720 s->prob.p.partition[bl][c];
2721 enum BlockPartition bp;
2722 ptrdiff_t hbs = 4 >> bl;
2725 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2726 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2727 } else if (col + hbs < s->cols) {
2728 if (row + hbs < s->rows) {
2729 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2731 case PARTITION_NONE:
2732 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2735 if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2736 yoff += hbs * 8 * s->f->linesize[0];
2737 uvoff += hbs * 4 * s->f->linesize[1];
2738 res = decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
2742 if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2745 res = decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
2748 case PARTITION_SPLIT:
2749 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2750 if (!(res = decode_sb(ctx, row, col + hbs, lflvl,
2751 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1))) {
2752 yoff += hbs * 8 * s->f->linesize[0];
2753 uvoff += hbs * 4 * s->f->linesize[1];
2754 if (!(res = decode_sb(ctx, row + hbs, col, lflvl,
2755 yoff, uvoff, bl + 1)))
2756 res = decode_sb(ctx, row + hbs, col + hbs, lflvl,
2757 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2762 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
2763 bp = PARTITION_SPLIT;
2764 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1)))
2765 res = decode_sb(ctx, row, col + hbs, lflvl,
2766 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2769 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2771 } else if (row + hbs < s->rows) {
2772 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
2773 bp = PARTITION_SPLIT;
2774 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2775 yoff += hbs * 8 * s->f->linesize[0];
2776 uvoff += hbs * 4 * s->f->linesize[1];
2777 res = decode_sb(ctx, row + hbs, col, lflvl,
2778 yoff, uvoff, bl + 1);
2782 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2785 bp = PARTITION_SPLIT;
2786 res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2788 s->counts.partition[bl][c][bp]++;
2793 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
2794 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
2796 VP9Context *s = ctx->priv_data;
2797 uint8_t *dst = s->f->data[0] + yoff, *lvl = lflvl->level;
2798 ptrdiff_t ls_y = s->f->linesize[0], ls_uv = s->f->linesize[1];
2801 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
2802 // if you think of them as acting on a 8x8 block max, we can interleave
2803 // each v/h within the single x loop, but that only works if we work on
2804 // 8 pixel blocks, and we won't always do that (we want at least 16px
2805 // to use SSE2 optimizations, perhaps 32 for AVX2)
2807 // filter edges between columns, Y plane (e.g. block1 | block2)
2808 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
2809 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
2810 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
2811 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
2812 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
2813 unsigned hm = hm1 | hm2 | hm13 | hm23;
2815 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
2817 int L = *l, H = L >> 4;
2818 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2821 if (hmask1[0] & x) {
2822 if (hmask2[0] & x) {
2823 av_assert2(l[8] == L);
2824 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
2826 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
2828 } else if (hm2 & x) {
2831 E |= s->filter.mblim_lut[L] << 8;
2832 I |= s->filter.lim_lut[L] << 8;
2833 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2835 [0](ptr, ls_y, E, I, H);
2837 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2838 [0](ptr, ls_y, E, I, H);
2841 } else if (hm2 & x) {
2842 int L = l[8], H = L >> 4;
2843 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2846 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2847 [0](ptr + 8 * ls_y, ls_y, E, I, H);
2851 int L = *l, H = L >> 4;
2852 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2857 E |= s->filter.mblim_lut[L] << 8;
2858 I |= s->filter.lim_lut[L] << 8;
2859 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
2861 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
2863 } else if (hm23 & x) {
2864 int L = l[8], H = L >> 4;
2865 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2867 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
2873 // filter edges between rows, Y plane (e.g. ------)
2875 dst = s->f->data[0] + yoff;
2877 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
2878 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
2879 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
2881 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
2884 int L = *l, H = L >> 4;
2885 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2888 if (vmask[0] & (x << 1)) {
2889 av_assert2(l[1] == L);
2890 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
2892 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
2894 } else if (vm & (x << 1)) {
2897 E |= s->filter.mblim_lut[L] << 8;
2898 I |= s->filter.lim_lut[L] << 8;
2899 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
2900 [!!(vmask[1] & (x << 1))]
2901 [1](ptr, ls_y, E, I, H);
2903 s->dsp.loop_filter_8[!!(vmask[1] & x)]
2904 [1](ptr, ls_y, E, I, H);
2906 } else if (vm & (x << 1)) {
2907 int L = l[1], H = L >> 4;
2908 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2910 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
2911 [1](ptr + 8, ls_y, E, I, H);
2915 int L = *l, H = L >> 4;
2916 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2918 if (vm3 & (x << 1)) {
2921 E |= s->filter.mblim_lut[L] << 8;
2922 I |= s->filter.lim_lut[L] << 8;
2923 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
2925 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
2927 } else if (vm3 & (x << 1)) {
2928 int L = l[1], H = L >> 4;
2929 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2931 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
2936 // same principle but for U/V planes
2937 for (p = 0; p < 2; p++) {
2939 dst = s->f->data[1 + p] + uvoff;
2940 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
2941 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
2942 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
2943 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
2944 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
2946 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
2949 int L = *l, H = L >> 4;
2950 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2952 if (hmask1[0] & x) {
2953 if (hmask2[0] & x) {
2954 av_assert2(l[16] == L);
2955 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
2957 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
2959 } else if (hm2 & x) {
2962 E |= s->filter.mblim_lut[L] << 8;
2963 I |= s->filter.lim_lut[L] << 8;
2964 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2966 [0](ptr, ls_uv, E, I, H);
2968 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2969 [0](ptr, ls_uv, E, I, H);
2971 } else if (hm2 & x) {
2972 int L = l[16], H = L >> 4;
2973 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2975 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2976 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
2984 dst = s->f->data[1 + p] + uvoff;
2985 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
2986 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
2987 unsigned vm = vmask[0] | vmask[1] | vmask[2];
2989 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
2992 int L = *l, H = L >> 4;
2993 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2996 if (vmask[0] & (x << 2)) {
2997 av_assert2(l[2] == L);
2998 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3000 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3002 } else if (vm & (x << 2)) {
3005 E |= s->filter.mblim_lut[L] << 8;
3006 I |= s->filter.lim_lut[L] << 8;
3007 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3008 [!!(vmask[1] & (x << 2))]
3009 [1](ptr, ls_uv, E, I, H);
3011 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3012 [1](ptr, ls_uv, E, I, H);
3014 } else if (vm & (x << 2)) {
3015 int L = l[2], H = L >> 4;
3016 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3018 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3019 [1](ptr + 8, ls_uv, E, I, H);
3029 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3031 int sb_start = ( idx * n) >> log2_n;
3032 int sb_end = ((idx + 1) * n) >> log2_n;
3033 *start = FFMIN(sb_start, n) << 3;
3034 *end = FFMIN(sb_end, n) << 3;
3037 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3038 int max_count, int update_factor)
3040 unsigned ct = ct0 + ct1, p2, p1;
3046 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3047 p2 = av_clip(p2, 1, 255);
3048 ct = FFMIN(ct, max_count);
3049 update_factor = FASTDIV(update_factor * ct, max_count);
3051 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3052 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3055 static void adapt_probs(VP9Context *s)
3058 prob_context *p = &s->prob_ctx[s->framectxid].p;
3059 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3062 for (i = 0; i < 4; i++)
3063 for (j = 0; j < 2; j++)
3064 for (k = 0; k < 2; k++)
3065 for (l = 0; l < 6; l++)
3066 for (m = 0; m < 6; m++) {
3067 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3068 unsigned *e = s->counts.eob[i][j][k][l][m];
3069 unsigned *c = s->counts.coef[i][j][k][l][m];
3071 if (l == 0 && m >= 3) // dc only has 3 pt
3074 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3075 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3076 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3079 if (s->keyframe || s->intraonly) {
3080 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3081 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3082 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3083 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3088 for (i = 0; i < 3; i++)
3089 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3092 for (i = 0; i < 4; i++)
3093 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3096 if (s->comppredmode == PRED_SWITCHABLE) {
3097 for (i = 0; i < 5; i++)
3098 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3102 if (s->comppredmode != PRED_SINGLEREF) {
3103 for (i = 0; i < 5; i++)
3104 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3105 s->counts.comp_ref[i][1], 20, 128);
3108 if (s->comppredmode != PRED_COMPREF) {
3109 for (i = 0; i < 5; i++) {
3110 uint8_t *pp = p->single_ref[i];
3111 unsigned (*c)[2] = s->counts.single_ref[i];
3113 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3114 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3118 // block partitioning
3119 for (i = 0; i < 4; i++)
3120 for (j = 0; j < 4; j++) {
3121 uint8_t *pp = p->partition[i][j];
3122 unsigned *c = s->counts.partition[i][j];
3124 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3125 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3126 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3130 if (s->txfmmode == TX_SWITCHABLE) {
3131 for (i = 0; i < 2; i++) {
3132 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3134 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3135 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3136 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3137 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3138 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3139 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3143 // interpolation filter
3144 if (s->filtermode == FILTER_SWITCHABLE) {
3145 for (i = 0; i < 4; i++) {
3146 uint8_t *pp = p->filter[i];
3147 unsigned *c = s->counts.filter[i];
3149 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3150 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3155 for (i = 0; i < 7; i++) {
3156 uint8_t *pp = p->mv_mode[i];
3157 unsigned *c = s->counts.mv_mode[i];
3159 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3160 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3161 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3166 uint8_t *pp = p->mv_joint;
3167 unsigned *c = s->counts.mv_joint;
3169 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3170 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3171 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3175 for (i = 0; i < 2; i++) {
3177 unsigned *c, (*c2)[2], sum;
3179 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3180 s->counts.mv_comp[i].sign[1], 20, 128);
3182 pp = p->mv_comp[i].classes;
3183 c = s->counts.mv_comp[i].classes;
3184 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3185 adapt_prob(&pp[0], c[0], sum, 20, 128);
3187 adapt_prob(&pp[1], c[1], sum, 20, 128);
3189 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3190 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3192 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3193 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3195 adapt_prob(&pp[6], c[6], sum, 20, 128);
3196 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3197 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3198 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3200 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3201 s->counts.mv_comp[i].class0[1], 20, 128);
3202 pp = p->mv_comp[i].bits;
3203 c2 = s->counts.mv_comp[i].bits;
3204 for (j = 0; j < 10; j++)
3205 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3207 for (j = 0; j < 2; j++) {
3208 pp = p->mv_comp[i].class0_fp[j];
3209 c = s->counts.mv_comp[i].class0_fp[j];
3210 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3211 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3212 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3214 pp = p->mv_comp[i].fp;
3215 c = s->counts.mv_comp[i].fp;
3216 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3217 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3218 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3220 if (s->highprecisionmvs) {
3221 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3222 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3223 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3224 s->counts.mv_comp[i].hp[1], 20, 128);
3229 for (i = 0; i < 4; i++) {
3230 uint8_t *pp = p->y_mode[i];
3231 unsigned *c = s->counts.y_mode[i], sum, s2;
3233 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3234 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3235 sum -= c[TM_VP8_PRED];
3236 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3237 sum -= c[VERT_PRED];
3238 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3239 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3241 adapt_prob(&pp[3], s2, sum, 20, 128);
3243 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3244 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3245 sum -= c[DIAG_DOWN_LEFT_PRED];
3246 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3247 sum -= c[VERT_LEFT_PRED];
3248 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3249 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3253 for (i = 0; i < 10; i++) {
3254 uint8_t *pp = p->uv_mode[i];
3255 unsigned *c = s->counts.uv_mode[i], sum, s2;
3257 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3258 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3259 sum -= c[TM_VP8_PRED];
3260 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3261 sum -= c[VERT_PRED];
3262 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3263 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3265 adapt_prob(&pp[3], s2, sum, 20, 128);
3267 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3268 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3269 sum -= c[DIAG_DOWN_LEFT_PRED];
3270 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3271 sum -= c[VERT_LEFT_PRED];
3272 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3273 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3277 static int vp9_decode_frame(AVCodecContext *ctx, void *out_pic,
3278 int *got_frame, const uint8_t *data, int size)
3280 VP9Context *s = ctx->priv_data;
3281 int res, tile_row, tile_col, i, ref, row, col;
3282 ptrdiff_t yoff = 0, uvoff = 0;
3283 //AVFrame *prev_frame = s->f; // for segmentation map
3285 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3287 } else if (res == 0) {
3288 if (!s->refs[ref]) {
3289 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3290 return AVERROR_INVALIDDATA;
3292 if ((res = av_frame_ref(out_pic, s->refs[ref])) < 0)
3300 // discard old references
3301 for (i = 0; i < 10; i++) {
3302 AVFrame *f = s->fb[i];
3303 if (f->data[0] && f != s->f &&
3304 f != s->refs[0] && f != s->refs[1] &&
3305 f != s->refs[2] && f != s->refs[3] &&
3306 f != s->refs[4] && f != s->refs[5] &&
3307 f != s->refs[6] && f != s->refs[7])
3311 // find unused reference
3312 for (i = 0; i < 10; i++)
3313 if (!s->fb[i]->data[0])
3316 if ((res = ff_get_buffer(ctx, s->f,
3317 s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
3319 s->f->key_frame = s->keyframe;
3320 s->f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3322 // main tile decode loop
3323 memset(s->above_partition_ctx, 0, s->cols);
3324 memset(s->above_skip_ctx, 0, s->cols);
3325 if (s->keyframe || s->intraonly) {
3326 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3328 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3330 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3331 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3332 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3333 memset(s->above_segpred_ctx, 0, s->cols);
3334 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3335 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3336 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3337 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3340 if (tile_col == s->tiling.tile_cols - 1 &&
3341 tile_row == s->tiling.tile_rows - 1) {
3344 tile_size = AV_RB32(data);
3348 if (tile_size > size)
3349 return AVERROR_INVALIDDATA;
3350 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3351 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
3352 return AVERROR_INVALIDDATA;
3357 for (row = s->tiling.tile_row_start;
3358 row < s->tiling.tile_row_end;
3359 row += 8, yoff += s->f->linesize[0] * 64,
3360 uvoff += s->f->linesize[1] * 32) {
3361 struct VP9Filter *lflvl_ptr = s->lflvl;
3362 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3364 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3365 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3366 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3368 memset(s->left_partition_ctx, 0, 8);
3369 memset(s->left_skip_ctx, 0, 8);
3370 if (s->keyframe || s->intraonly) {
3371 memset(s->left_mode_ctx, DC_PRED, 16);
3373 memset(s->left_mode_ctx, NEARESTMV, 8);
3375 memset(s->left_y_nnz_ctx, 0, 16);
3376 memset(s->left_uv_nnz_ctx, 0, 16);
3377 memset(s->left_segpred_ctx, 0, 8);
3379 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3380 for (col = s->tiling.tile_col_start;
3381 col < s->tiling.tile_col_end;
3382 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3383 // FIXME integrate with lf code (i.e. zero after each
3384 // use, similar to invtxfm coefficients, or similar)
3385 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3387 if ((res = decode_sb(ctx, row, col, lflvl_ptr,
3388 yoff2, uvoff2, BL_64X64)) < 0)
3391 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3394 // backup pre-loopfilter reconstruction data for intra
3395 // prediction of next row of sb64s
3396 if (row + 8 < s->rows) {
3397 memcpy(s->intra_pred_data[0],
3398 s->f->data[0] + yoff + 63 * s->f->linesize[0],
3400 memcpy(s->intra_pred_data[1],
3401 s->f->data[1] + uvoff + 31 * s->f->linesize[1],
3403 memcpy(s->intra_pred_data[2],
3404 s->f->data[2] + uvoff + 31 * s->f->linesize[2],
3408 // loopfilter one row
3409 if (s->filter.level) {
3412 lflvl_ptr = s->lflvl;
3413 for (col = 0; col < s->cols;
3414 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3415 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3421 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
3422 // probability maintenance between frames)
3423 if (s->refreshctx) {
3424 if (s->parallelmode) {
3427 for (i = 0; i < 4; i++)
3428 for (j = 0; j < 2; j++)
3429 for (k = 0; k < 2; k++)
3430 for (l = 0; l < 6; l++)
3431 for (m = 0; m < 6; m++)
3432 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3433 s->prob.coef[i][j][k][l][m], 3);
3434 s->prob_ctx[s->framectxid].p = s->prob.p;
3439 FFSWAP(struct VP9mvrefPair *, s->mv[0], s->mv[1]);
3442 for (i = 0; i < 8; i++)
3443 if (s->refreshrefmask & (1 << i))
3446 if (!s->invisible) {
3447 if ((res = av_frame_ref(out_pic, s->f)) < 0)
3455 static int vp9_decode_packet(AVCodecContext *avctx, void *out_pic,
3456 int *got_frame, AVPacket *avpkt)
3458 const uint8_t *data = avpkt->data;
3459 int size = avpkt->size, marker, res;
3461 // read superframe index - this is a collection of individual frames that
3462 // together lead to one visible frame
3463 av_assert1(size > 0); // without CODEC_CAP_DELAY, this is implied
3464 marker = data[size - 1];
3465 if ((marker & 0xe0) == 0xc0) {
3466 int nbytes = 1 + ((marker >> 3) & 0x3);
3467 int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
3469 if (size >= idx_sz && data[size - idx_sz] == marker) {
3470 const uint8_t *idx = data + size + 1 - idx_sz;
3472 #define case_n(a, rd) \
3474 while (n_frames--) { \
3478 av_log(avctx, AV_LOG_ERROR, \
3479 "Superframe packet size too big: %d > %d\n", \
3481 return AVERROR_INVALIDDATA; \
3483 res = vp9_decode_frame(avctx, out_pic, got_frame, \
3492 case_n(2, AV_RL16(idx));
3493 case_n(3, AV_RL24(idx));
3494 case_n(4, AV_RL32(idx));
3499 // if we get here, there was no valid superframe index, i.e. this is just
3500 // one whole single frame - decode it as such from the complete input buf
3501 if ((res = vp9_decode_frame(avctx, out_pic, got_frame, data, size)) < 0)
3506 static void vp9_decode_flush(AVCodecContext *ctx)
3508 VP9Context *s = ctx->priv_data;
3511 for (i = 0; i < 10; i++)
3512 if (s->fb[i]->data[0])
3513 av_frame_unref(s->fb[i]);
3514 for (i = 0; i < 8; i++)
3519 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3521 VP9Context *s = ctx->priv_data;
3524 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3525 ff_vp9dsp_init(&s->dsp);
3526 ff_videodsp_init(&s->vdsp, 8);
3527 for (i = 0; i < 10; i++) {
3528 s->fb[i] = av_frame_alloc();
3530 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3531 return AVERROR(ENOMEM);
3534 s->filter.sharpness = -1;
3539 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3541 VP9Context *s = ctx->priv_data;
3544 for (i = 0; i < 10; i++) {
3545 if (s->fb[i]->data[0])
3546 av_frame_unref(s->fb[i]);
3547 av_frame_free(&s->fb[i]);
3549 av_freep(&s->above_partition_ctx);
3550 s->above_skip_ctx = s->above_txfm_ctx = s->above_mode_ctx = NULL;
3551 s->above_y_nnz_ctx = s->above_uv_nnz_ctx[0] = s->above_uv_nnz_ctx[1] = NULL;
3552 s->intra_pred_data[0] = s->intra_pred_data[1] = s->intra_pred_data[2] = NULL;
3553 s->above_segpred_ctx = s->above_intra_ctx = s->above_comp_ctx = NULL;
3554 s->above_ref_ctx = s->above_filter_ctx = NULL;
3555 s->above_mv_ctx = NULL;
3556 s->segmentation_map = NULL;
3557 s->mv[0] = s->mv[1] = NULL;
3565 AVCodec ff_vp9_decoder = {
3567 .type = AVMEDIA_TYPE_VIDEO,
3568 .id = AV_CODEC_ID_VP9,
3569 .priv_data_size = sizeof(VP9Context),
3570 .init = vp9_decode_init,
3571 .close = vp9_decode_free,
3572 .decode = vp9_decode_packet,
3573 .capabilities = CODEC_CAP_DR1,
3574 .flush = vp9_decode_flush,
3575 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),