2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 #include "libavutil/avassert.h"
34 #define VP9_SYNCCODE 0x498342
73 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
74 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
77 typedef struct VP9Block {
78 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
79 enum FilterMode filter;
80 VP56mv mv[4 /* b_idx */][2 /* ref */];
82 enum TxfmMode tx, uvtx;
84 int row, row7, col, col7;
86 ptrdiff_t y_stride, uv_stride;
89 typedef struct VP9Context {
100 uint8_t keyframe, last_keyframe;
102 uint8_t use_last_frame_mvs;
108 uint8_t refreshrefmask;
109 uint8_t highprecisionmvs;
110 enum FilterMode filtermode;
111 uint8_t allowcompinter;
114 uint8_t parallelmode;
118 uint8_t varcompref[2];
119 AVFrame *refs[8], *f;
125 uint8_t mblim_lut[64];
133 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
138 uint8_t absolute_vals;
144 uint8_t skip_enabled;
153 unsigned log2_tile_cols, log2_tile_rows;
154 unsigned tile_cols, tile_rows;
155 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
157 unsigned sb_cols, sb_rows, rows, cols;
160 uint8_t coef[4][2][2][6][6][3];
164 uint8_t coef[4][2][2][6][6][11];
169 unsigned y_mode[4][10];
170 unsigned uv_mode[10][10];
171 unsigned filter[4][3];
172 unsigned mv_mode[7][4];
173 unsigned intra[4][2];
175 unsigned single_ref[5][2][2];
176 unsigned comp_ref[5][2];
177 unsigned tx32p[2][4];
178 unsigned tx16p[2][3];
181 unsigned mv_joint[4];
184 unsigned classes[11];
186 unsigned bits[10][2];
187 unsigned class0_fp[2][4];
189 unsigned class0_hp[2];
192 unsigned partition[4][4][4];
193 unsigned coef[4][2][2][6][6][3];
194 unsigned eob[4][2][2][6][6][2];
196 enum TxfmMode txfmmode;
197 enum CompPredMode comppredmode;
199 // contextual (left/above) cache
200 uint8_t left_partition_ctx[8], *above_partition_ctx;
201 uint8_t left_mode_ctx[16], *above_mode_ctx;
202 // FIXME maybe merge some of the below in a flags field?
203 uint8_t left_y_nnz_ctx[16], *above_y_nnz_ctx;
204 uint8_t left_uv_nnz_ctx[2][8], *above_uv_nnz_ctx[2];
205 uint8_t left_skip_ctx[8], *above_skip_ctx; // 1bit
206 uint8_t left_txfm_ctx[8], *above_txfm_ctx; // 2bit
207 uint8_t left_segpred_ctx[8], *above_segpred_ctx; // 1bit
208 uint8_t left_intra_ctx[8], *above_intra_ctx; // 1bit
209 uint8_t left_comp_ctx[8], *above_comp_ctx; // 1bit
210 uint8_t left_ref_ctx[8], *above_ref_ctx; // 2bit
211 uint8_t left_filter_ctx[8], *above_filter_ctx;
212 VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
215 uint8_t *intra_pred_data[3];
216 uint8_t *segmentation_map;
217 struct VP9mvrefPair *mv[2];
218 struct VP9Filter *lflvl;
219 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
221 // block reconstruction intermediates
222 DECLARE_ALIGNED(32, int16_t, block)[4096];
223 DECLARE_ALIGNED(32, int16_t, uvblock)[2][1024];
225 uint8_t uveob[2][64];
226 VP56mv min_mv, max_mv;
227 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
228 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
231 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
233 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
234 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
236 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
237 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
241 static int update_size(AVCodecContext *ctx, int w, int h)
243 VP9Context *s = ctx->priv_data;
246 av_assert0(w > 0 && h > 0);
248 if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
253 s->sb_cols = (w + 63) >> 6;
254 s->sb_rows = (h + 63) >> 6;
255 s->cols = (w + 7) >> 3;
256 s->rows = (h + 7) >> 3;
258 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
259 av_freep(&s->above_partition_ctx);
260 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
261 64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
263 return AVERROR(ENOMEM);
264 assign(s->above_partition_ctx, uint8_t *, 8);
265 assign(s->above_skip_ctx, uint8_t *, 8);
266 assign(s->above_txfm_ctx, uint8_t *, 8);
267 assign(s->above_mode_ctx, uint8_t *, 16);
268 assign(s->above_y_nnz_ctx, uint8_t *, 16);
269 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
270 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
271 assign(s->intra_pred_data[0], uint8_t *, 64);
272 assign(s->intra_pred_data[1], uint8_t *, 32);
273 assign(s->intra_pred_data[2], uint8_t *, 32);
274 assign(s->above_segpred_ctx, uint8_t *, 8);
275 assign(s->above_intra_ctx, uint8_t *, 8);
276 assign(s->above_comp_ctx, uint8_t *, 8);
277 assign(s->above_ref_ctx, uint8_t *, 8);
278 assign(s->above_filter_ctx, uint8_t *, 8);
279 assign(s->lflvl, struct VP9Filter *, 1);
280 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
281 assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
282 assign(s->mv[0], struct VP9mvrefPair *, 64 * s->sb_rows);
283 assign(s->mv[1], struct VP9mvrefPair *, 64 * s->sb_rows);
289 // for some reason the sign bit is at the end, not the start, of a bit sequence
290 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
292 int v = get_bits(gb, n);
293 return get_bits1(gb) ? -v : v;
296 static av_always_inline int inv_recenter_nonneg(int v, int m)
298 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
301 // differential forward probability updates
302 static int update_prob(VP56RangeCoder *c, int p)
304 static const int inv_map_table[254] = {
305 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
306 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
307 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
308 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
309 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
310 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
311 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
312 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
313 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
314 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
315 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
316 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
317 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
318 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
319 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
320 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
321 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
322 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
327 /* This code is trying to do a differential probability update. For a
328 * current probability A in the range [1, 255], the difference to a new
329 * probability of any value can be expressed differentially as 1-A,255-A
330 * where some part of this (absolute range) exists both in positive as
331 * well as the negative part, whereas another part only exists in one
332 * half. We're trying to code this shared part differentially, i.e.
333 * times two where the value of the lowest bit specifies the sign, and
334 * the single part is then coded on top of this. This absolute difference
335 * then again has a value of [0,254], but a bigger value in this range
336 * indicates that we're further away from the original value A, so we
337 * can code this as a VLC code, since higher values are increasingly
338 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
339 * updates vs. the 'fine, exact' updates further down the range, which
340 * adds one extra dimension to this differential update model. */
342 if (!vp8_rac_get(c)) {
343 d = vp8_rac_get_uint(c, 4) + 0;
344 } else if (!vp8_rac_get(c)) {
345 d = vp8_rac_get_uint(c, 4) + 16;
346 } else if (!vp8_rac_get(c)) {
347 d = vp8_rac_get_uint(c, 5) + 32;
349 d = vp8_rac_get_uint(c, 7);
351 d = (d << 1) - 65 + vp8_rac_get(c);
355 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
356 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
359 static int decode_frame_header(AVCodecContext *ctx,
360 const uint8_t *data, int size, int *ref)
362 VP9Context *s = ctx->priv_data;
363 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
365 const uint8_t *data2;
368 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
369 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
372 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
373 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
374 return AVERROR_INVALIDDATA;
376 s->profile = get_bits1(&s->gb);
377 if (get_bits1(&s->gb)) { // reserved bit
378 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
379 return AVERROR_INVALIDDATA;
381 if (get_bits1(&s->gb)) {
382 *ref = get_bits(&s->gb, 3);
385 s->last_keyframe = s->keyframe;
386 s->keyframe = !get_bits1(&s->gb);
387 last_invisible = s->invisible;
388 s->invisible = !get_bits1(&s->gb);
389 s->errorres = get_bits1(&s->gb);
390 // FIXME disable this upon resolution change
391 s->use_last_frame_mvs = !s->errorres && !last_invisible;
393 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
394 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
395 return AVERROR_INVALIDDATA;
397 s->colorspace = get_bits(&s->gb, 3);
398 if (s->colorspace == 7) { // RGB = profile 1
399 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
400 return AVERROR_INVALIDDATA;
402 s->fullrange = get_bits1(&s->gb);
403 // for profile 1, here follows the subsampling bits
404 s->refreshrefmask = 0xff;
405 w = get_bits(&s->gb, 16) + 1;
406 h = get_bits(&s->gb, 16) + 1;
407 if (get_bits1(&s->gb)) // display size
408 skip_bits(&s->gb, 32);
410 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
411 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
413 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
414 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
415 return AVERROR_INVALIDDATA;
417 s->refreshrefmask = get_bits(&s->gb, 8);
418 w = get_bits(&s->gb, 16) + 1;
419 h = get_bits(&s->gb, 16) + 1;
420 if (get_bits1(&s->gb)) // display size
421 skip_bits(&s->gb, 32);
423 s->refreshrefmask = get_bits(&s->gb, 8);
424 s->refidx[0] = get_bits(&s->gb, 3);
425 s->signbias[0] = get_bits1(&s->gb);
426 s->refidx[1] = get_bits(&s->gb, 3);
427 s->signbias[1] = get_bits1(&s->gb);
428 s->refidx[2] = get_bits(&s->gb, 3);
429 s->signbias[2] = get_bits1(&s->gb);
430 if (!s->refs[s->refidx[0]]->buf[0] ||
431 !s->refs[s->refidx[1]]->buf[0] ||
432 !s->refs[s->refidx[2]]->buf[0]) {
433 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
434 return AVERROR_INVALIDDATA;
436 if (get_bits1(&s->gb)) {
437 w = s->refs[s->refidx[0]]->width;
438 h = s->refs[s->refidx[0]]->height;
439 } else if (get_bits1(&s->gb)) {
440 w = s->refs[s->refidx[1]]->width;
441 h = s->refs[s->refidx[1]]->height;
442 } else if (get_bits1(&s->gb)) {
443 w = s->refs[s->refidx[2]]->width;
444 h = s->refs[s->refidx[2]]->height;
446 w = get_bits(&s->gb, 16) + 1;
447 h = get_bits(&s->gb, 16) + 1;
449 if (get_bits1(&s->gb)) // display size
450 skip_bits(&s->gb, 32);
451 s->highprecisionmvs = get_bits1(&s->gb);
452 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
454 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
455 s->signbias[0] != s->signbias[2];
456 if (s->allowcompinter) {
457 if (s->signbias[0] == s->signbias[1]) {
459 s->varcompref[0] = 0;
460 s->varcompref[1] = 1;
461 } else if (s->signbias[0] == s->signbias[2]) {
463 s->varcompref[0] = 0;
464 s->varcompref[1] = 2;
467 s->varcompref[0] = 1;
468 s->varcompref[1] = 2;
473 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
474 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
475 s->framectxid = c = get_bits(&s->gb, 2);
477 /* loopfilter header data */
478 s->filter.level = get_bits(&s->gb, 6);
479 sharp = get_bits(&s->gb, 3);
480 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
481 // the old cache values since they are still valid
482 if (s->filter.sharpness != sharp)
483 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
484 s->filter.sharpness = sharp;
485 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
486 if (get_bits1(&s->gb)) {
487 for (i = 0; i < 4; i++)
488 if (get_bits1(&s->gb))
489 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
490 for (i = 0; i < 2; i++)
491 if (get_bits1(&s->gb))
492 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
495 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
498 /* quantization header data */
499 s->yac_qi = get_bits(&s->gb, 8);
500 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
501 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
502 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
503 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
504 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
506 /* segmentation header info */
507 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
508 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
509 for (i = 0; i < 7; i++)
510 s->prob.seg[i] = get_bits1(&s->gb) ?
511 get_bits(&s->gb, 8) : 255;
512 if ((s->segmentation.temporal = get_bits1(&s->gb)))
513 for (i = 0; i < 3; i++)
514 s->prob.segpred[i] = get_bits1(&s->gb) ?
515 get_bits(&s->gb, 8) : 255;
518 if (get_bits1(&s->gb)) {
519 s->segmentation.absolute_vals = get_bits1(&s->gb);
520 for (i = 0; i < 8; i++) {
521 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
522 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
523 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
524 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
525 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
526 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
527 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
531 s->segmentation.feat[0].q_enabled = 0;
532 s->segmentation.feat[0].lf_enabled = 0;
533 s->segmentation.feat[0].skip_enabled = 0;
534 s->segmentation.feat[0].ref_enabled = 0;
537 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
538 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
539 int qyac, qydc, quvac, quvdc, lflvl, sh;
541 if (s->segmentation.feat[i].q_enabled) {
542 if (s->segmentation.absolute_vals)
543 qyac = s->segmentation.feat[i].q_val;
545 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
549 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
550 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
551 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
552 qyac = av_clip_uintp2(qyac, 8);
554 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
555 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
556 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
557 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
559 sh = s->filter.level >= 32;
560 if (s->segmentation.feat[i].lf_enabled) {
561 if (s->segmentation.absolute_vals)
562 lflvl = s->segmentation.feat[i].lf_val;
564 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
566 lflvl = s->filter.level;
568 s->segmentation.feat[i].lflvl[0][0] =
569 s->segmentation.feat[i].lflvl[0][1] =
570 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
571 for (j = 1; j < 4; j++) {
572 s->segmentation.feat[i].lflvl[j][0] =
573 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
574 s->lf_delta.mode[0]) << sh), 6);
575 s->segmentation.feat[i].lflvl[j][1] =
576 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
577 s->lf_delta.mode[1]) << sh), 6);
582 if ((res = update_size(ctx, w, h)) < 0) {
583 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
586 for (s->tiling.log2_tile_cols = 0;
587 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
588 s->tiling.log2_tile_cols++) ;
589 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
590 max = FFMAX(0, max - 1);
591 while (max > s->tiling.log2_tile_cols) {
592 if (get_bits1(&s->gb))
593 s->tiling.log2_tile_cols++;
597 s->tiling.log2_tile_rows = decode012(&s->gb);
598 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
599 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
600 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
601 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
602 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
604 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
605 return AVERROR(ENOMEM);
609 if (s->keyframe || s->errorres || s->intraonly) {
610 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
611 s->prob_ctx[3].p = vp9_default_probs;
612 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
613 sizeof(vp9_default_coef_probs));
614 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
615 sizeof(vp9_default_coef_probs));
616 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
617 sizeof(vp9_default_coef_probs));
618 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
619 sizeof(vp9_default_coef_probs));
622 // next 16 bits is size of the rest of the header (arith-coded)
623 size2 = get_bits(&s->gb, 16);
624 data2 = align_get_bits(&s->gb);
625 if (size2 > size - (data2 - data)) {
626 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
627 return AVERROR_INVALIDDATA;
629 ff_vp56_init_range_decoder(&s->c, data2, size2);
630 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
631 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
632 return AVERROR_INVALIDDATA;
635 if (s->keyframe || s->intraonly) {
636 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
638 memset(&s->counts, 0, sizeof(s->counts));
640 // FIXME is it faster to not copy here, but do it down in the fw updates
641 // as explicit copies if the fw update is missing (and skip the copy upon
643 s->prob.p = s->prob_ctx[c].p;
647 s->txfmmode = TX_4X4;
649 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
650 if (s->txfmmode == 3)
651 s->txfmmode += vp8_rac_get(&s->c);
653 if (s->txfmmode == TX_SWITCHABLE) {
654 for (i = 0; i < 2; i++)
655 if (vp56_rac_get_prob_branchy(&s->c, 252))
656 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
657 for (i = 0; i < 2; i++)
658 for (j = 0; j < 2; j++)
659 if (vp56_rac_get_prob_branchy(&s->c, 252))
660 s->prob.p.tx16p[i][j] =
661 update_prob(&s->c, s->prob.p.tx16p[i][j]);
662 for (i = 0; i < 2; i++)
663 for (j = 0; j < 3; j++)
664 if (vp56_rac_get_prob_branchy(&s->c, 252))
665 s->prob.p.tx32p[i][j] =
666 update_prob(&s->c, s->prob.p.tx32p[i][j]);
671 for (i = 0; i < 4; i++) {
672 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
673 if (vp8_rac_get(&s->c)) {
674 for (j = 0; j < 2; j++)
675 for (k = 0; k < 2; k++)
676 for (l = 0; l < 6; l++)
677 for (m = 0; m < 6; m++) {
678 uint8_t *p = s->prob.coef[i][j][k][l][m];
679 uint8_t *r = ref[j][k][l][m];
680 if (m >= 3 && l == 0) // dc only has 3 pt
682 for (n = 0; n < 3; n++) {
683 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
684 p[n] = update_prob(&s->c, r[n]);
692 for (j = 0; j < 2; j++)
693 for (k = 0; k < 2; k++)
694 for (l = 0; l < 6; l++)
695 for (m = 0; m < 6; m++) {
696 uint8_t *p = s->prob.coef[i][j][k][l][m];
697 uint8_t *r = ref[j][k][l][m];
698 if (m > 3 && l == 0) // dc only has 3 pt
704 if (s->txfmmode == i)
709 for (i = 0; i < 3; i++)
710 if (vp56_rac_get_prob_branchy(&s->c, 252))
711 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
712 if (!s->keyframe && !s->intraonly) {
713 for (i = 0; i < 7; i++)
714 for (j = 0; j < 3; j++)
715 if (vp56_rac_get_prob_branchy(&s->c, 252))
716 s->prob.p.mv_mode[i][j] =
717 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
719 if (s->filtermode == FILTER_SWITCHABLE)
720 for (i = 0; i < 4; i++)
721 for (j = 0; j < 2; j++)
722 if (vp56_rac_get_prob_branchy(&s->c, 252))
723 s->prob.p.filter[i][j] =
724 update_prob(&s->c, s->prob.p.filter[i][j]);
726 for (i = 0; i < 4; i++)
727 if (vp56_rac_get_prob_branchy(&s->c, 252))
728 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
730 if (s->allowcompinter) {
731 s->comppredmode = vp8_rac_get(&s->c);
733 s->comppredmode += vp8_rac_get(&s->c);
734 if (s->comppredmode == PRED_SWITCHABLE)
735 for (i = 0; i < 5; i++)
736 if (vp56_rac_get_prob_branchy(&s->c, 252))
738 update_prob(&s->c, s->prob.p.comp[i]);
740 s->comppredmode = PRED_SINGLEREF;
743 if (s->comppredmode != PRED_COMPREF) {
744 for (i = 0; i < 5; i++) {
745 if (vp56_rac_get_prob_branchy(&s->c, 252))
746 s->prob.p.single_ref[i][0] =
747 update_prob(&s->c, s->prob.p.single_ref[i][0]);
748 if (vp56_rac_get_prob_branchy(&s->c, 252))
749 s->prob.p.single_ref[i][1] =
750 update_prob(&s->c, s->prob.p.single_ref[i][1]);
754 if (s->comppredmode != PRED_SINGLEREF) {
755 for (i = 0; i < 5; i++)
756 if (vp56_rac_get_prob_branchy(&s->c, 252))
757 s->prob.p.comp_ref[i] =
758 update_prob(&s->c, s->prob.p.comp_ref[i]);
761 for (i = 0; i < 4; i++)
762 for (j = 0; j < 9; j++)
763 if (vp56_rac_get_prob_branchy(&s->c, 252))
764 s->prob.p.y_mode[i][j] =
765 update_prob(&s->c, s->prob.p.y_mode[i][j]);
767 for (i = 0; i < 4; i++)
768 for (j = 0; j < 4; j++)
769 for (k = 0; k < 3; k++)
770 if (vp56_rac_get_prob_branchy(&s->c, 252))
771 s->prob.p.partition[3 - i][j][k] =
772 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
774 // mv fields don't use the update_prob subexp model for some reason
775 for (i = 0; i < 3; i++)
776 if (vp56_rac_get_prob_branchy(&s->c, 252))
777 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
779 for (i = 0; i < 2; i++) {
780 if (vp56_rac_get_prob_branchy(&s->c, 252))
781 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
783 for (j = 0; j < 10; j++)
784 if (vp56_rac_get_prob_branchy(&s->c, 252))
785 s->prob.p.mv_comp[i].classes[j] =
786 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
788 if (vp56_rac_get_prob_branchy(&s->c, 252))
789 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
791 for (j = 0; j < 10; j++)
792 if (vp56_rac_get_prob_branchy(&s->c, 252))
793 s->prob.p.mv_comp[i].bits[j] =
794 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
797 for (i = 0; i < 2; i++) {
798 for (j = 0; j < 2; j++)
799 for (k = 0; k < 3; k++)
800 if (vp56_rac_get_prob_branchy(&s->c, 252))
801 s->prob.p.mv_comp[i].class0_fp[j][k] =
802 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
804 for (j = 0; j < 3; j++)
805 if (vp56_rac_get_prob_branchy(&s->c, 252))
806 s->prob.p.mv_comp[i].fp[j] =
807 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
810 if (s->highprecisionmvs) {
811 for (i = 0; i < 2; i++) {
812 if (vp56_rac_get_prob_branchy(&s->c, 252))
813 s->prob.p.mv_comp[i].class0_hp =
814 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
816 if (vp56_rac_get_prob_branchy(&s->c, 252))
817 s->prob.p.mv_comp[i].hp =
818 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
823 return (data2 - data) + size2;
826 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
829 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
830 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
833 static void find_ref_mvs(VP9Context *s,
834 VP56mv *pmv, int ref, int z, int idx, int sb)
836 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
837 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
838 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
839 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
840 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
841 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
842 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
843 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
844 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
845 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
846 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
847 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
848 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
849 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
850 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
851 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
852 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
853 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
854 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
855 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
856 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
857 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
858 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
859 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
860 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
861 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
862 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
864 VP9Block *const b = &s->b;
865 int row = b->row, col = b->col, row7 = b->row7;
866 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
867 #define INVALID_MV 0x80008000U
868 uint32_t mem = INVALID_MV;
871 #define RETURN_DIRECT_MV(mv) \
873 uint32_t m = AV_RN32A(&mv); \
877 } else if (mem == INVALID_MV) { \
879 } else if (m != mem) { \
886 if (sb == 2 || sb == 1) {
887 RETURN_DIRECT_MV(b->mv[0][z]);
888 } else if (sb == 3) {
889 RETURN_DIRECT_MV(b->mv[2][z]);
890 RETURN_DIRECT_MV(b->mv[1][z]);
891 RETURN_DIRECT_MV(b->mv[0][z]);
894 #define RETURN_MV(mv) \
899 clamp_mv(&tmp, &mv, s); \
900 m = AV_RN32A(&tmp); \
904 } else if (mem == INVALID_MV) { \
906 } else if (m != mem) { \
911 uint32_t m = AV_RN32A(&mv); \
913 clamp_mv(pmv, &mv, s); \
915 } else if (mem == INVALID_MV) { \
917 } else if (m != mem) { \
918 clamp_mv(pmv, &mv, s); \
925 struct VP9mvrefPair *mv = &s->mv[0][(row - 1) * s->sb_cols * 8 + col];
926 if (mv->ref[0] == ref) {
927 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
928 } else if (mv->ref[1] == ref) {
929 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
932 if (col > s->tiling.tile_col_start) {
933 struct VP9mvrefPair *mv = &s->mv[0][row * s->sb_cols * 8 + col - 1];
934 if (mv->ref[0] == ref) {
935 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
936 } else if (mv->ref[1] == ref) {
937 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
945 // previously coded MVs in this neighbourhood, using same reference frame
947 int c = p[i][0] + col, r = p[i][1] + row;
949 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
950 struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
952 if (mv->ref[0] == ref) {
953 RETURN_MV(mv->mv[0]);
954 } else if (mv->ref[1] == ref) {
955 RETURN_MV(mv->mv[1]);
960 // MV at this position in previous frame, using same reference frame
961 if (s->use_last_frame_mvs) {
962 struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
964 if (mv->ref[0] == ref) {
965 RETURN_MV(mv->mv[0]);
966 } else if (mv->ref[1] == ref) {
967 RETURN_MV(mv->mv[1]);
971 #define RETURN_SCALE_MV(mv, scale) \
974 VP56mv mv_temp = { -mv.x, -mv.y }; \
975 RETURN_MV(mv_temp); \
981 // previously coded MVs in this neighbourhood, using different reference frame
982 for (i = 0; i < 8; i++) {
983 int c = p[i][0] + col, r = p[i][1] + row;
985 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
986 struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
988 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
989 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
991 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
992 // BUG - libvpx has this condition regardless of whether
993 // we used the first ref MV and pre-scaling
994 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
995 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1000 // MV at this position in previous frame, using different reference frame
1001 if (s->use_last_frame_mvs) {
1002 struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
1004 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1005 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1007 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1008 // BUG - libvpx has this condition regardless of whether
1009 // we used the first ref MV and pre-scaling
1010 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1011 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1018 #undef RETURN_SCALE_MV
1021 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1023 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1024 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1025 s->prob.p.mv_comp[idx].classes);
1027 s->counts.mv_comp[idx].sign[sign]++;
1028 s->counts.mv_comp[idx].classes[c]++;
1032 for (n = 0, m = 0; m < c; m++) {
1033 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1035 s->counts.mv_comp[idx].bits[m][bit]++;
1038 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1040 s->counts.mv_comp[idx].fp[bit]++;
1042 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1043 s->counts.mv_comp[idx].hp[bit]++;
1047 // bug in libvpx - we count for bw entropy purposes even if the
1049 s->counts.mv_comp[idx].hp[1]++;
1053 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1054 s->counts.mv_comp[idx].class0[n]++;
1055 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1056 s->prob.p.mv_comp[idx].class0_fp[n]);
1057 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1058 n = (n << 3) | (bit << 1);
1060 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1061 s->counts.mv_comp[idx].class0_hp[bit]++;
1065 // bug in libvpx - we count for bw entropy purposes even if the
1067 s->counts.mv_comp[idx].class0_hp[1]++;
1071 return sign ? -(n + 1) : (n + 1);
1074 static void fill_mv(VP9Context *s,
1075 VP56mv *mv, int mode, int sb)
1077 VP9Block *const b = &s->b;
1079 if (mode == ZEROMV) {
1080 memset(mv, 0, sizeof(*mv) * 2);
1084 // FIXME cache this value and reuse for other subblocks
1085 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1086 mode == NEWMV ? -1 : sb);
1087 // FIXME maybe move this code into find_ref_mvs()
1088 if ((mode == NEWMV || sb == -1) &&
1089 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1103 if (mode == NEWMV) {
1104 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1105 s->prob.p.mv_joint);
1107 s->counts.mv_joint[j]++;
1108 if (j >= MV_JOINT_V)
1109 mv[0].y += read_mv_component(s, 0, hp);
1111 mv[0].x += read_mv_component(s, 1, hp);
1115 // FIXME cache this value and reuse for other subblocks
1116 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1117 mode == NEWMV ? -1 : sb);
1118 if ((mode == NEWMV || sb == -1) &&
1119 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1133 if (mode == NEWMV) {
1134 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1135 s->prob.p.mv_joint);
1137 s->counts.mv_joint[j]++;
1138 if (j >= MV_JOINT_V)
1139 mv[1].y += read_mv_component(s, 0, hp);
1141 mv[1].x += read_mv_component(s, 1, hp);
1147 static void decode_mode(AVCodecContext *ctx)
1149 static const uint8_t left_ctx[N_BS_SIZES] = {
1150 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1152 static const uint8_t above_ctx[N_BS_SIZES] = {
1153 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1155 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1156 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1157 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1159 VP9Context *s = ctx->priv_data;
1160 VP9Block *const b = &s->b;
1161 int row = b->row, col = b->col, row7 = b->row7;
1162 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1163 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1164 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1165 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1167 if (!s->segmentation.enabled) {
1169 } else if (s->keyframe || s->intraonly) {
1170 b->seg_id = s->segmentation.update_map ?
1171 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg) : 0;
1172 } else if (!s->segmentation.update_map ||
1173 (s->segmentation.temporal &&
1174 vp56_rac_get_prob_branchy(&s->c,
1175 s->prob.segpred[s->above_segpred_ctx[col] +
1176 s->left_segpred_ctx[row7]]))) {
1179 for (y = 0; y < h4; y++)
1180 for (x = 0; x < w4; x++)
1181 pred = FFMIN(pred, s->segmentation_map[(y + row) * 8 * s->sb_cols + x + col]);
1182 av_assert1(pred < 8);
1185 memset(&s->above_segpred_ctx[col], 1, w4);
1186 memset(&s->left_segpred_ctx[row7], 1, h4);
1188 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1191 memset(&s->above_segpred_ctx[col], 0, w4);
1192 memset(&s->left_segpred_ctx[row7], 0, h4);
1194 if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
1195 for (y = 0; y < h4; y++)
1196 memset(&s->segmentation_map[(y + row) * 8 * s->sb_cols + col],
1200 b->skip = s->segmentation.enabled &&
1201 s->segmentation.feat[b->seg_id].skip_enabled;
1203 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1204 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1205 s->counts.skip[c][b->skip]++;
1208 if (s->keyframe || s->intraonly) {
1210 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1211 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1215 if (have_a && have_l) {
1216 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1219 c = have_a ? 2 * s->above_intra_ctx[col] :
1220 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1222 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1223 s->counts.intra[c][bit]++;
1227 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1231 c = (s->above_skip_ctx[col] ? max_tx :
1232 s->above_txfm_ctx[col]) +
1233 (s->left_skip_ctx[row7] ? max_tx :
1234 s->left_txfm_ctx[row7]) > max_tx;
1236 c = s->above_skip_ctx[col] ? 1 :
1237 (s->above_txfm_ctx[col] * 2 > max_tx);
1239 } else if (have_l) {
1240 c = s->left_skip_ctx[row7] ? 1 :
1241 (s->left_txfm_ctx[row7] * 2 > max_tx);
1247 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1249 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1251 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1253 s->counts.tx32p[c][b->tx]++;
1256 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1258 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1259 s->counts.tx16p[c][b->tx]++;
1262 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1263 s->counts.tx8p[c][b->tx]++;
1270 b->tx = FFMIN(max_tx, s->txfmmode);
1273 if (s->keyframe || s->intraonly) {
1274 uint8_t *a = &s->above_mode_ctx[col * 2];
1275 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1278 if (b->bs > BS_8x8) {
1279 // FIXME the memory storage intermediates here aren't really
1280 // necessary, they're just there to make the code slightly
1282 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1283 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1284 if (b->bs != BS_8x4) {
1285 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1286 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1287 l[0] = a[1] = b->mode[1];
1289 l[0] = a[1] = b->mode[1] = b->mode[0];
1291 if (b->bs != BS_4x8) {
1292 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1293 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1294 if (b->bs != BS_8x4) {
1295 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1296 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1297 l[1] = a[1] = b->mode[3];
1299 l[1] = a[1] = b->mode[3] = b->mode[2];
1302 b->mode[2] = b->mode[0];
1303 l[1] = a[1] = b->mode[3] = b->mode[1];
1306 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1307 vp9_default_kf_ymode_probs[*a][*l]);
1308 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1309 // FIXME this can probably be optimized
1310 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1311 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1313 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1314 vp9_default_kf_uvmode_probs[b->mode[3]]);
1315 } else if (b->intra) {
1317 if (b->bs > BS_8x8) {
1318 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1319 s->prob.p.y_mode[0]);
1320 s->counts.y_mode[0][b->mode[0]]++;
1321 if (b->bs != BS_8x4) {
1322 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1323 s->prob.p.y_mode[0]);
1324 s->counts.y_mode[0][b->mode[1]]++;
1326 b->mode[1] = b->mode[0];
1328 if (b->bs != BS_4x8) {
1329 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1330 s->prob.p.y_mode[0]);
1331 s->counts.y_mode[0][b->mode[2]]++;
1332 if (b->bs != BS_8x4) {
1333 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1334 s->prob.p.y_mode[0]);
1335 s->counts.y_mode[0][b->mode[3]]++;
1337 b->mode[3] = b->mode[2];
1340 b->mode[2] = b->mode[0];
1341 b->mode[3] = b->mode[1];
1344 static const uint8_t size_group[10] = {
1345 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1347 int sz = size_group[b->bs];
1349 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1350 s->prob.p.y_mode[sz]);
1351 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1352 s->counts.y_mode[sz][b->mode[3]]++;
1354 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1355 s->prob.p.uv_mode[b->mode[3]]);
1356 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1358 static const uint8_t inter_mode_ctx_lut[14][14] = {
1359 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1360 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1361 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1362 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1363 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1364 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1365 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1366 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1367 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1368 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1369 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1370 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1371 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1372 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1375 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1376 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1378 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1380 // read comp_pred flag
1381 if (s->comppredmode != PRED_SWITCHABLE) {
1382 b->comp = s->comppredmode == PRED_COMPREF;
1386 // FIXME add intra as ref=0xff (or -1) to make these easier?
1389 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1391 } else if (s->above_comp_ctx[col]) {
1392 c = 2 + (s->left_intra_ctx[row7] ||
1393 s->left_ref_ctx[row7] == s->fixcompref);
1394 } else if (s->left_comp_ctx[row7]) {
1395 c = 2 + (s->above_intra_ctx[col] ||
1396 s->above_ref_ctx[col] == s->fixcompref);
1398 c = (!s->above_intra_ctx[col] &&
1399 s->above_ref_ctx[col] == s->fixcompref) ^
1400 (!s->left_intra_ctx[row7] &&
1401 s->left_ref_ctx[row & 7] == s->fixcompref);
1404 c = s->above_comp_ctx[col] ? 3 :
1405 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1407 } else if (have_l) {
1408 c = s->left_comp_ctx[row7] ? 3 :
1409 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1413 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1414 s->counts.comp[c][b->comp]++;
1417 // read actual references
1418 // FIXME probably cache a few variables here to prevent repetitive
1419 // memory accesses below
1420 if (b->comp) /* two references */ {
1421 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1423 b->ref[fix_idx] = s->fixcompref;
1424 // FIXME can this codeblob be replaced by some sort of LUT?
1427 if (s->above_intra_ctx[col]) {
1428 if (s->left_intra_ctx[row7]) {
1431 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1433 } else if (s->left_intra_ctx[row7]) {
1434 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1436 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1438 if (refl == refa && refa == s->varcompref[1]) {
1440 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1441 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1442 (refl == s->fixcompref && refa == s->varcompref[0])) {
1445 c = (refa == refl) ? 3 : 1;
1447 } else if (!s->left_comp_ctx[row7]) {
1448 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1451 c = (refl == s->varcompref[1] &&
1452 refa != s->varcompref[1]) ? 2 : 4;
1454 } else if (!s->above_comp_ctx[col]) {
1455 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1458 c = (refa == s->varcompref[1] &&
1459 refl != s->varcompref[1]) ? 2 : 4;
1462 c = (refl == refa) ? 4 : 2;
1466 if (s->above_intra_ctx[col]) {
1468 } else if (s->above_comp_ctx[col]) {
1469 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1471 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1474 } else if (have_l) {
1475 if (s->left_intra_ctx[row7]) {
1477 } else if (s->left_comp_ctx[row7]) {
1478 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1480 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1485 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1486 b->ref[var_idx] = s->varcompref[bit];
1487 s->counts.comp_ref[c][bit]++;
1488 } else /* single reference */ {
1491 if (have_a && !s->above_intra_ctx[col]) {
1492 if (have_l && !s->left_intra_ctx[row7]) {
1493 if (s->left_comp_ctx[row7]) {
1494 if (s->above_comp_ctx[col]) {
1495 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1496 !s->above_ref_ctx[col]);
1498 c = (3 * !s->above_ref_ctx[col]) +
1499 (!s->fixcompref || !s->left_ref_ctx[row7]);
1501 } else if (s->above_comp_ctx[col]) {
1502 c = (3 * !s->left_ref_ctx[row7]) +
1503 (!s->fixcompref || !s->above_ref_ctx[col]);
1505 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1507 } else if (s->above_intra_ctx[col]) {
1509 } else if (s->above_comp_ctx[col]) {
1510 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1512 c = 4 * (!s->above_ref_ctx[col]);
1514 } else if (have_l && !s->left_intra_ctx[row7]) {
1515 if (s->left_intra_ctx[row7]) {
1517 } else if (s->left_comp_ctx[row7]) {
1518 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1520 c = 4 * (!s->left_ref_ctx[row7]);
1525 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1526 s->counts.single_ref[c][0][bit]++;
1530 // FIXME can this codeblob be replaced by some sort of LUT?
1533 if (s->left_intra_ctx[row7]) {
1534 if (s->above_intra_ctx[col]) {
1536 } else if (s->above_comp_ctx[col]) {
1537 c = 1 + 2 * (s->fixcompref == 1 ||
1538 s->above_ref_ctx[col] == 1);
1539 } else if (!s->above_ref_ctx[col]) {
1542 c = 4 * (s->above_ref_ctx[col] == 1);
1544 } else if (s->above_intra_ctx[col]) {
1545 if (s->left_intra_ctx[row7]) {
1547 } else if (s->left_comp_ctx[row7]) {
1548 c = 1 + 2 * (s->fixcompref == 1 ||
1549 s->left_ref_ctx[row7] == 1);
1550 } else if (!s->left_ref_ctx[row7]) {
1553 c = 4 * (s->left_ref_ctx[row7] == 1);
1555 } else if (s->above_comp_ctx[col]) {
1556 if (s->left_comp_ctx[row7]) {
1557 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1558 c = 3 * (s->fixcompref == 1 ||
1559 s->left_ref_ctx[row7] == 1);
1563 } else if (!s->left_ref_ctx[row7]) {
1564 c = 1 + 2 * (s->fixcompref == 1 ||
1565 s->above_ref_ctx[col] == 1);
1567 c = 3 * (s->left_ref_ctx[row7] == 1) +
1568 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1570 } else if (s->left_comp_ctx[row7]) {
1571 if (!s->above_ref_ctx[col]) {
1572 c = 1 + 2 * (s->fixcompref == 1 ||
1573 s->left_ref_ctx[row7] == 1);
1575 c = 3 * (s->above_ref_ctx[col] == 1) +
1576 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1578 } else if (!s->above_ref_ctx[col]) {
1579 if (!s->left_ref_ctx[row7]) {
1582 c = 4 * (s->left_ref_ctx[row7] == 1);
1584 } else if (!s->left_ref_ctx[row7]) {
1585 c = 4 * (s->above_ref_ctx[col] == 1);
1587 c = 2 * (s->left_ref_ctx[row7] == 1) +
1588 2 * (s->above_ref_ctx[col] == 1);
1591 if (s->above_intra_ctx[col] ||
1592 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1594 } else if (s->above_comp_ctx[col]) {
1595 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1597 c = 4 * (s->above_ref_ctx[col] == 1);
1600 } else if (have_l) {
1601 if (s->left_intra_ctx[row7] ||
1602 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1604 } else if (s->left_comp_ctx[row7]) {
1605 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1607 c = 4 * (s->left_ref_ctx[row7] == 1);
1612 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1613 s->counts.single_ref[c][1][bit]++;
1614 b->ref[0] = 1 + bit;
1619 if (b->bs <= BS_8x8) {
1620 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1621 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1623 static const uint8_t off[10] = {
1624 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1627 // FIXME this needs to use the LUT tables from find_ref_mvs
1628 // because not all are -1,0/0,-1
1629 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1630 [s->left_mode_ctx[row7 + off[b->bs]]];
1632 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1633 s->prob.p.mv_mode[c]);
1634 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1635 s->counts.mv_mode[c][b->mode[0] - 10]++;
1639 if (s->filtermode == FILTER_SWITCHABLE) {
1642 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1643 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1644 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1645 s->left_filter_ctx[row7] : 3;
1647 c = s->above_filter_ctx[col];
1649 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1650 c = s->left_filter_ctx[row7];
1655 b->filter = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1656 s->prob.p.filter[c]);
1657 s->counts.filter[c][b->filter]++;
1659 b->filter = s->filtermode;
1662 if (b->bs > BS_8x8) {
1663 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1665 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1666 s->prob.p.mv_mode[c]);
1667 s->counts.mv_mode[c][b->mode[0] - 10]++;
1668 fill_mv(s, b->mv[0], b->mode[0], 0);
1670 if (b->bs != BS_8x4) {
1671 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1672 s->prob.p.mv_mode[c]);
1673 s->counts.mv_mode[c][b->mode[1] - 10]++;
1674 fill_mv(s, b->mv[1], b->mode[1], 1);
1676 b->mode[1] = b->mode[0];
1677 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1678 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1681 if (b->bs != BS_4x8) {
1682 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1683 s->prob.p.mv_mode[c]);
1684 s->counts.mv_mode[c][b->mode[2] - 10]++;
1685 fill_mv(s, b->mv[2], b->mode[2], 2);
1687 if (b->bs != BS_8x4) {
1688 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1689 s->prob.p.mv_mode[c]);
1690 s->counts.mv_mode[c][b->mode[3] - 10]++;
1691 fill_mv(s, b->mv[3], b->mode[3], 3);
1693 b->mode[3] = b->mode[2];
1694 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1695 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1698 b->mode[2] = b->mode[0];
1699 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1700 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1701 b->mode[3] = b->mode[1];
1702 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1703 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1706 fill_mv(s, b->mv[0], b->mode[0], -1);
1707 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1708 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1709 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1710 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1711 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1712 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1716 // FIXME this can probably be optimized
1717 memset(&s->above_skip_ctx[col], b->skip, w4);
1718 memset(&s->left_skip_ctx[row7], b->skip, h4);
1719 memset(&s->above_txfm_ctx[col], b->tx, w4);
1720 memset(&s->left_txfm_ctx[row7], b->tx, h4);
1721 memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
1722 memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
1723 if (!s->keyframe && !s->intraonly) {
1724 memset(&s->above_intra_ctx[col], b->intra, w4);
1725 memset(&s->left_intra_ctx[row7], b->intra, h4);
1726 memset(&s->above_comp_ctx[col], b->comp, w4);
1727 memset(&s->left_comp_ctx[row7], b->comp, h4);
1728 memset(&s->above_mode_ctx[col], b->mode[3], w4);
1729 memset(&s->left_mode_ctx[row7], b->mode[3], h4);
1730 if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
1731 memset(&s->above_filter_ctx[col], b->filter, w4);
1732 memset(&s->left_filter_ctx[row7], b->filter, h4);
1733 b->filter = vp9_filter_lut[b->filter];
1735 if (b->bs > BS_8x8) {
1736 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1738 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1739 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1740 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1741 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1742 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1743 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1744 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1745 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1747 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1749 for (n = 0; n < w4 * 2; n++) {
1750 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1751 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1753 for (n = 0; n < h4 * 2; n++) {
1754 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1755 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1759 if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
1760 // as a direct check in above branches
1761 int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1763 memset(&s->above_ref_ctx[col], vref, w4);
1764 memset(&s->left_ref_ctx[row7], vref, h4);
1769 for (y = 0; y < h4; y++) {
1770 int x, o = (row + y) * s->sb_cols * 8 + col;
1773 for (x = 0; x < w4; x++) {
1774 s->mv[0][o + x].ref[0] =
1775 s->mv[0][o + x].ref[1] = -1;
1777 } else if (b->comp) {
1778 for (x = 0; x < w4; x++) {
1779 s->mv[0][o + x].ref[0] = b->ref[0];
1780 s->mv[0][o + x].ref[1] = b->ref[1];
1781 AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1782 AV_COPY32(&s->mv[0][o + x].mv[1], &b->mv[3][1]);
1785 for (x = 0; x < w4; x++) {
1786 s->mv[0][o + x].ref[0] = b->ref[0];
1787 s->mv[0][o + x].ref[1] = -1;
1788 AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1794 // FIXME remove tx argument, and merge cnt/eob arguments?
1795 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
1796 enum TxfmMode tx, unsigned (*cnt)[6][3],
1797 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
1798 int nnz, const int16_t *scan, const int16_t (*nb)[2],
1799 const int16_t *band_counts, const int16_t *qmul)
1801 int i = 0, band = 0, band_left = band_counts[band];
1802 uint8_t *tp = p[0][nnz];
1803 uint8_t cache[1024];
1808 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
1809 eob[band][nnz][val]++;
1814 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
1815 cnt[band][nnz][0]++;
1817 band_left = band_counts[++band];
1819 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1821 if (++i == n_coeffs)
1822 break; //invalid input; blocks should end with EOB
1827 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
1828 cnt[band][nnz][1]++;
1832 // fill in p[3-10] (model fill) - only once per frame for each pos
1834 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
1836 cnt[band][nnz][2]++;
1837 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
1838 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
1839 cache[rc] = val = 2;
1841 val = 3 + vp56_rac_get_prob(c, tp[5]);
1844 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
1846 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
1847 val = 5 + vp56_rac_get_prob(c, 159);
1849 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
1850 val += vp56_rac_get_prob(c, 145);
1854 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
1855 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
1856 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
1857 val += (vp56_rac_get_prob(c, 148) << 1);
1858 val += vp56_rac_get_prob(c, 140);
1860 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
1861 val += (vp56_rac_get_prob(c, 155) << 2);
1862 val += (vp56_rac_get_prob(c, 140) << 1);
1863 val += vp56_rac_get_prob(c, 135);
1865 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
1866 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
1867 val += (vp56_rac_get_prob(c, 157) << 3);
1868 val += (vp56_rac_get_prob(c, 141) << 2);
1869 val += (vp56_rac_get_prob(c, 134) << 1);
1870 val += vp56_rac_get_prob(c, 130);
1872 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
1873 val += (vp56_rac_get_prob(c, 254) << 12);
1874 val += (vp56_rac_get_prob(c, 254) << 11);
1875 val += (vp56_rac_get_prob(c, 252) << 10);
1876 val += (vp56_rac_get_prob(c, 249) << 9);
1877 val += (vp56_rac_get_prob(c, 243) << 8);
1878 val += (vp56_rac_get_prob(c, 230) << 7);
1879 val += (vp56_rac_get_prob(c, 196) << 6);
1880 val += (vp56_rac_get_prob(c, 177) << 5);
1881 val += (vp56_rac_get_prob(c, 153) << 4);
1882 val += (vp56_rac_get_prob(c, 140) << 3);
1883 val += (vp56_rac_get_prob(c, 133) << 2);
1884 val += (vp56_rac_get_prob(c, 130) << 1);
1885 val += vp56_rac_get_prob(c, 129);
1890 band_left = band_counts[++band];
1891 if (tx == TX_32X32) // FIXME slow
1892 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
1894 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
1895 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1897 } while (++i < n_coeffs);
1902 static int decode_coeffs(AVCodecContext *ctx)
1904 VP9Context *s = ctx->priv_data;
1905 VP9Block *const b = &s->b;
1906 int row = b->row, col = b->col;
1907 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
1908 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
1909 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
1910 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
1911 int end_x = FFMIN(2 * (s->cols - col), w4);
1912 int end_y = FFMIN(2 * (s->rows - row), h4);
1913 int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
1914 int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
1915 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
1916 int tx = 4 * s->lossless + b->tx;
1917 const int16_t * const *yscans = vp9_scans[tx];
1918 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
1919 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
1920 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
1921 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
1922 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
1923 static const int16_t band_counts[4][8] = {
1924 { 1, 2, 3, 4, 3, 16 - 13 },
1925 { 1, 2, 3, 4, 11, 64 - 21 },
1926 { 1, 2, 3, 4, 11, 256 - 21 },
1927 { 1, 2, 3, 4, 11, 1024 - 21 },
1929 const int16_t *y_band_counts = band_counts[b->tx];
1930 const int16_t *uv_band_counts = band_counts[b->uvtx];
1933 if (b->tx > TX_4X4) { // FIXME slow
1934 for (y = 0; y < end_y; y += step1d)
1935 for (x = 1; x < step1d; x++)
1937 for (x = 0; x < end_x; x += step1d)
1938 for (y = 1; y < step1d; y++)
1941 for (n = 0, y = 0; y < end_y; y += step1d) {
1942 for (x = 0; x < end_x; x += step1d, n += step) {
1943 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
1946 int nnz = a[x] + l[y];
1947 if ((res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
1948 b->tx, c, e, p, nnz, yscans[txtp],
1949 ynbs[txtp], y_band_counts, qmul[0])) < 0)
1951 a[x] = l[y] = !!res;
1952 if (b->tx > TX_8X8) {
1953 AV_WN16A(&s->eob[n], res);
1959 if (b->tx > TX_4X4) { // FIXME slow
1960 for (y = 0; y < end_y; y += step1d)
1961 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
1962 for (x = 0; x < end_x; x += step1d)
1963 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
1966 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1967 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1968 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1973 for (pl = 0; pl < 2; pl++) {
1974 a = &s->above_uv_nnz_ctx[pl][col];
1975 l = &s->left_uv_nnz_ctx[pl][row & 7];
1976 if (b->uvtx > TX_4X4) { // FIXME slow
1977 for (y = 0; y < end_y; y += uvstep1d)
1978 for (x = 1; x < uvstep1d; x++)
1980 for (x = 0; x < end_x; x += uvstep1d)
1981 for (y = 1; y < uvstep1d; y++)
1984 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
1985 for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
1986 int nnz = a[x] + l[y];
1987 if ((res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
1988 16 * uvstep, b->uvtx, c, e, p, nnz,
1989 uvscan, uvnb, uv_band_counts,
1992 a[x] = l[y] = !!res;
1993 if (b->uvtx > TX_8X8) {
1994 AV_WN16A(&s->uveob[pl][n], res);
1996 s->uveob[pl][n] = res;
2000 if (b->uvtx > TX_4X4) { // FIXME slow
2001 for (y = 0; y < end_y; y += uvstep1d)
2002 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
2003 for (x = 0; x < end_x; x += uvstep1d)
2004 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
2011 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2012 uint8_t *dst_edge, ptrdiff_t stride_edge,
2013 uint8_t *dst_inner, ptrdiff_t stride_inner,
2014 uint8_t *l, int col, int x, int w,
2015 int row, int y, enum TxfmMode tx,
2018 int have_top = row > 0 || y > 0;
2019 int have_left = col > s->tiling.tile_col_start || x > 0;
2020 int have_right = x < w - 1;
2021 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2022 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2023 { DC_127_PRED, VERT_PRED } },
2024 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2025 { HOR_PRED, HOR_PRED } },
2026 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2027 { LEFT_DC_PRED, DC_PRED } },
2028 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2029 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2030 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2031 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2032 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2033 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2034 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2035 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2036 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2037 { DC_127_PRED, VERT_LEFT_PRED } },
2038 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2039 { HOR_UP_PRED, HOR_UP_PRED } },
2040 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2041 { HOR_PRED, TM_VP8_PRED } },
2043 static const struct {
2044 uint8_t needs_left:1;
2045 uint8_t needs_top:1;
2046 uint8_t needs_topleft:1;
2047 uint8_t needs_topright:1;
2048 } edges[N_INTRA_PRED_MODES] = {
2049 [VERT_PRED] = { .needs_top = 1 },
2050 [HOR_PRED] = { .needs_left = 1 },
2051 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2052 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2053 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2054 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2055 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2056 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2057 [HOR_UP_PRED] = { .needs_left = 1 },
2058 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2059 [LEFT_DC_PRED] = { .needs_left = 1 },
2060 [TOP_DC_PRED] = { .needs_top = 1 },
2061 [DC_128_PRED] = { 0 },
2062 [DC_127_PRED] = { 0 },
2063 [DC_129_PRED] = { 0 }
2066 av_assert2(mode >= 0 && mode < 10);
2067 mode = mode_conv[mode][have_left][have_top];
2068 if (edges[mode].needs_top) {
2069 uint8_t *top, *topleft;
2070 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2071 int n_px_need_tr = 0;
2073 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2076 // if top of sb64-row, use s->intra_pred_data[] instead of
2077 // dst[-stride] for intra prediction (it contains pre- instead of
2078 // post-loopfilter data)
2080 top = !(row & 7) && !y ?
2081 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2082 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2084 topleft = !(row & 7) && !y ?
2085 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2086 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2087 &dst_inner[-stride_inner];
2091 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2092 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2093 n_px_need + n_px_need_tr <= n_px_have) {
2097 if (n_px_need <= n_px_have) {
2098 memcpy(*a, top, n_px_need);
2100 memcpy(*a, top, n_px_have);
2101 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2102 n_px_need - n_px_have);
2105 memset(*a, 127, n_px_need);
2107 if (edges[mode].needs_topleft) {
2108 if (have_left && have_top) {
2109 (*a)[-1] = topleft[-1];
2111 (*a)[-1] = have_top ? 129 : 127;
2114 if (tx == TX_4X4 && edges[mode].needs_topright) {
2115 if (have_top && have_right &&
2116 n_px_need + n_px_need_tr <= n_px_have) {
2117 memcpy(&(*a)[4], &top[4], 4);
2119 memset(&(*a)[4], (*a)[3], 4);
2124 if (edges[mode].needs_left) {
2126 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2127 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2128 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2130 if (n_px_need <= n_px_have) {
2131 for (i = 0; i < n_px_need; i++)
2132 l[i] = dst[i * stride - 1];
2134 for (i = 0; i < n_px_have; i++)
2135 l[i] = dst[i * stride - 1];
2136 memset(&l[i], l[i - 1], n_px_need - n_px_have);
2139 memset(l, 129, 4 << tx);
2146 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2148 VP9Context *s = ctx->priv_data;
2149 VP9Block *const b = &s->b;
2150 int row = b->row, col = b->col;
2151 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2152 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2153 int end_x = FFMIN(2 * (s->cols - col), w4);
2154 int end_y = FFMIN(2 * (s->rows - row), h4);
2155 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2156 int uvstep1d = 1 << b->uvtx, p;
2157 uint8_t *dst = b->dst[0], *dst_r = s->f->data[0] + y_off;
2159 for (n = 0, y = 0; y < end_y; y += step1d) {
2160 uint8_t *ptr = dst, *ptr_r = dst_r;
2161 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2162 ptr_r += 4 * step1d, n += step) {
2163 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2165 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2166 uint8_t *a = &a_buf[16], l[32];
2167 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2168 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2170 mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[0],
2171 ptr, b->y_stride, l,
2172 col, x, w4, row, y, b->tx, 0);
2173 s->dsp.intra_pred[b->tx][mode](ptr, b->y_stride, l, a);
2175 s->dsp.itxfm_add[tx][txtp](ptr, b->y_stride,
2176 s->block + 16 * n, eob);
2178 dst_r += 4 * s->f->linesize[0] * step1d;
2179 dst += 4 * b->y_stride * step1d;
2187 step = 1 << (b->uvtx * 2);
2188 for (p = 0; p < 2; p++) {
2189 dst = b->dst[1 + p];
2190 dst_r = s->f->data[1 + p] + uv_off;
2191 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2192 uint8_t *ptr = dst, *ptr_r = dst_r;
2193 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2194 ptr_r += 4 * uvstep1d, n += step) {
2195 int mode = b->uvmode;
2196 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2197 uint8_t *a = &a_buf[16], l[32];
2198 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2200 mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[1],
2201 ptr, b->uv_stride, l,
2202 col, x, w4, row, y, b->uvtx, p + 1);
2203 s->dsp.intra_pred[b->uvtx][mode](ptr, b->uv_stride, l, a);
2205 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2206 s->uvblock[p] + 16 * n, eob);
2208 dst_r += 4 * uvstep1d * s->f->linesize[1];
2209 dst += 4 * uvstep1d * b->uv_stride;
2214 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2215 uint8_t *dst, ptrdiff_t dst_stride,
2216 const uint8_t *ref, ptrdiff_t ref_stride,
2217 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2218 int bw, int bh, int w, int h)
2220 int mx = mv->x, my = mv->y;
2224 ref += y * ref_stride + x;
2227 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2228 if (x < !!mx * 3 || y < !!my * 3 ||
2229 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2230 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2231 ref - !!my * 3 * ref_stride - !!mx * 3,
2233 bw + !!mx * 7, bh + !!my * 7,
2234 x - !!mx * 3, y - !!my * 3, w, h);
2235 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2238 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2241 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2242 uint8_t *dst_u, uint8_t *dst_v,
2243 ptrdiff_t dst_stride,
2244 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2245 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2246 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2247 int bw, int bh, int w, int h)
2249 int mx = mv->x, my = mv->y;
2253 ref_u += y * src_stride_u + x;
2254 ref_v += y * src_stride_v + x;
2257 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2258 if (x < !!mx * 3 || y < !!my * 3 ||
2259 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2260 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2261 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2263 bw + !!mx * 7, bh + !!my * 7,
2264 x - !!mx * 3, y - !!my * 3, w, h);
2265 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2266 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2268 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2269 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2271 bw + !!mx * 7, bh + !!my * 7,
2272 x - !!mx * 3, y - !!my * 3, w, h);
2273 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2274 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2276 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2277 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2281 static void inter_recon(AVCodecContext *ctx)
2283 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2284 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2285 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2287 VP9Context *s = ctx->priv_data;
2288 VP9Block *const b = &s->b;
2289 int row = b->row, col = b->col;
2290 AVFrame *ref1 = s->refs[s->refidx[b->ref[0]]];
2291 AVFrame *ref2 = b->comp ? s->refs[s->refidx[b->ref[1]]] : NULL;
2292 int w = ctx->width, h = ctx->height;
2293 ptrdiff_t ls_y = b->y_stride, ls_uv = b->uv_stride;
2296 if (b->bs > BS_8x8) {
2297 if (b->bs == BS_8x4) {
2298 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], b->dst[0], ls_y,
2299 ref1->data[0], ref1->linesize[0],
2300 row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
2301 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2302 b->dst[0] + 4 * ls_y, ls_y,
2303 ref1->data[0], ref1->linesize[0],
2304 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
2307 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], b->dst[0], ls_y,
2308 ref2->data[0], ref2->linesize[0],
2309 row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
2310 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2311 b->dst[0] + 4 * ls_y, ls_y,
2312 ref2->data[0], ref2->linesize[0],
2313 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
2315 } else if (b->bs == BS_4x8) {
2316 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2317 ref1->data[0], ref1->linesize[0],
2318 row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
2319 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2320 ref1->data[0], ref1->linesize[0],
2321 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
2324 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2325 ref2->data[0], ref2->linesize[0],
2326 row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
2327 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2328 ref2->data[0], ref2->linesize[0],
2329 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
2332 av_assert2(b->bs == BS_4x4);
2334 // FIXME if two horizontally adjacent blocks have the same MV,
2335 // do a w8 instead of a w4 call
2336 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2337 ref1->data[0], ref1->linesize[0],
2338 row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
2339 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2340 ref1->data[0], ref1->linesize[0],
2341 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
2342 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2343 b->dst[0] + 4 * ls_y, ls_y,
2344 ref1->data[0], ref1->linesize[0],
2345 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
2346 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2347 b->dst[0] + 4 * ls_y + 4, ls_y,
2348 ref1->data[0], ref1->linesize[0],
2349 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
2352 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2353 ref2->data[0], ref2->linesize[0],
2354 row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
2355 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2356 ref2->data[0], ref2->linesize[0],
2357 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
2358 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2359 b->dst[0] + 4 * ls_y, ls_y,
2360 ref2->data[0], ref2->linesize[0],
2361 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
2362 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2363 b->dst[0] + 4 * ls_y + 4, ls_y,
2364 ref2->data[0], ref2->linesize[0],
2365 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
2369 int bwl = bwlog_tab[0][b->bs];
2370 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2372 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], b->dst[0], ls_y,
2373 ref1->data[0], ref1->linesize[0],
2374 row << 3, col << 3, &b->mv[0][0],bw, bh, w, h);
2377 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], b->dst[0], ls_y,
2378 ref2->data[0], ref2->linesize[0],
2379 row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
2384 int bwl = bwlog_tab[1][b->bs];
2385 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2390 if (b->bs > BS_8x8) {
2391 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2392 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2397 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2398 b->dst[1], b->dst[2], ls_uv,
2399 ref1->data[1], ref1->linesize[1],
2400 ref1->data[2], ref1->linesize[2],
2401 row << 2, col << 2, &mvuv, bw, bh, w, h);
2404 if (b->bs > BS_8x8) {
2405 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2406 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2410 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2411 b->dst[1], b->dst[2], ls_uv,
2412 ref2->data[1], ref2->linesize[1],
2413 ref2->data[2], ref2->linesize[2],
2414 row << 2, col << 2, &mvuv, bw, bh, w, h);
2419 /* mostly copied intra_reconn() */
2421 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2422 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2423 int end_x = FFMIN(2 * (s->cols - col), w4);
2424 int end_y = FFMIN(2 * (s->rows - row), h4);
2425 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2426 int uvstep1d = 1 << b->uvtx, p;
2427 uint8_t *dst = b->dst[0];
2430 for (n = 0, y = 0; y < end_y; y += step1d) {
2432 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2433 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2436 s->dsp.itxfm_add[tx][DCT_DCT](ptr, b->y_stride,
2437 s->block + 16 * n, eob);
2439 dst += 4 * b->y_stride * step1d;
2447 step = 1 << (b->uvtx * 2);
2448 for (p = 0; p < 2; p++) {
2449 dst = b->dst[p + 1];
2450 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2452 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2453 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2456 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2457 s->uvblock[p] + 16 * n, eob);
2459 dst += 4 * uvstep1d * b->uv_stride;
2465 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2466 int row_and_7, int col_and_7,
2467 int w, int h, int col_end, int row_end,
2468 enum TxfmMode tx, int skip_inter)
2470 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2471 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2472 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2473 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2475 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2476 // edges. This means that for UV, we work on two subsampled blocks at
2477 // a time, and we only use the topleft block's mode information to set
2478 // things like block strength. Thus, for any block size smaller than
2479 // 16x16, ignore the odd portion of the block.
2480 if (tx == TX_4X4 && is_uv) {
2495 if (tx == TX_4X4 && !skip_inter) {
2496 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2497 int m_col_odd = (t << (w - 1)) - t;
2499 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2501 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2503 for (y = row_and_7; y < h + row_and_7; y++) {
2504 int col_mask_id = 2 - !(y & 7);
2506 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2507 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2508 // for odd lines, if the odd col is not being filtered,
2509 // skip odd row also:
2516 // if a/c are even row/col and b/d are odd, and d is skipped,
2517 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2518 if ((col_end & 1) && (y & 1)) {
2519 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2521 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2525 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2527 for (y = row_and_7; y < h + row_and_7; y++) {
2528 int col_mask_id = 2 - !(y & 3);
2530 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2531 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2532 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2533 lflvl->mask[is_uv][0][y][3] |= m_col;
2534 lflvl->mask[is_uv][1][y][3] |= m_col;
2538 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2541 int mask_id = (tx == TX_8X8);
2542 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2543 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2544 int m_row = m_col & masks[l2];
2546 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2547 // 8wd loopfilter to prevent going off the visible edge.
2548 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2549 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2550 int m_row_8 = m_row - m_row_16;
2552 for (y = row_and_7; y < h + row_and_7; y++) {
2553 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2554 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2557 for (y = row_and_7; y < h + row_and_7; y++)
2558 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2561 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2562 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2563 lflvl->mask[is_uv][1][y][0] |= m_col;
2564 if (y - row_and_7 == h - 1)
2565 lflvl->mask[is_uv][1][y][1] |= m_col;
2567 for (y = row_and_7; y < h + row_and_7; y += step1d)
2568 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2570 } else if (tx != TX_4X4) {
2573 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2574 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2575 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2576 for (y = row_and_7; y < h + row_and_7; y++)
2577 lflvl->mask[is_uv][0][y][mask_id] |= t;
2579 int t8 = t & 0x01, t4 = t - t8;
2581 for (y = row_and_7; y < h + row_and_7; y++) {
2582 lflvl->mask[is_uv][0][y][2] |= t4;
2583 lflvl->mask[is_uv][0][y][1] |= t8;
2585 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2587 int t8 = t & 0x11, t4 = t - t8;
2589 for (y = row_and_7; y < h + row_and_7; y++) {
2590 lflvl->mask[is_uv][0][y][2] |= t4;
2591 lflvl->mask[is_uv][0][y][1] |= t8;
2593 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2598 static int decode_b(AVCodecContext *ctx, int row, int col,
2599 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2600 enum BlockLevel bl, enum BlockPartition bp)
2602 VP9Context *s = ctx->priv_data;
2603 VP9Block *const b = &s->b;
2604 enum BlockSize bs = bl * 3 + bp;
2605 int res, y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2612 s->min_mv.x = -(128 + col * 64);
2613 s->min_mv.y = -(128 + row * 64);
2614 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2615 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2618 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2621 if ((res = decode_coeffs(ctx)) < 0)
2626 memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
2627 memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
2628 for (pl = 0; pl < 2; pl++) {
2629 memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
2630 memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
2634 // emulated overhangs if the stride of the target buffer can't hold. This
2635 // allows to support emu-edge and so on even if we have large block
2637 emu[0] = (col + w4) * 8 > s->f->linesize[0] ||
2638 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2639 emu[1] = (col + w4) * 4 > s->f->linesize[1] ||
2640 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2642 b->dst[0] = s->tmp_y;
2645 b->dst[0] = s->f->data[0] + yoff;
2646 b->y_stride = s->f->linesize[0];
2649 b->dst[1] = s->tmp_uv[0];
2650 b->dst[2] = s->tmp_uv[1];
2653 b->dst[1] = s->f->data[1] + uvoff;
2654 b->dst[2] = s->f->data[2] + uvoff;
2655 b->uv_stride = s->f->linesize[1];
2658 intra_recon(ctx, yoff, uvoff);
2663 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2665 for (n = 0; o < w; n++) {
2670 s->dsp.mc[n][0][0][0][0](s->f->data[0] + yoff + o, s->f->linesize[0],
2671 s->tmp_y + o, 64, h, 0, 0);
2677 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2679 for (n = 1; o < w; n++) {
2684 s->dsp.mc[n][0][0][0][0](s->f->data[1] + uvoff + o, s->f->linesize[1],
2685 s->tmp_uv[0] + o, 32, h, 0, 0);
2686 s->dsp.mc[n][0][0][0][0](s->f->data[2] + uvoff + o, s->f->linesize[2],
2687 s->tmp_uv[1] + o, 32, h, 0, 0);
2693 // pick filter level and find edges to apply filter to
2694 if (s->filter.level &&
2695 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
2696 [b->mode[3] != ZEROMV]) > 0) {
2697 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
2698 int skip_inter = !b->intra && b->skip;
2700 for (y = 0; y < h4; y++)
2701 memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
2702 mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
2703 mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
2704 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
2705 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
2706 b->uvtx, skip_inter);
2708 if (!s->filter.lim_lut[lvl]) {
2709 int sharp = s->filter.sharpness;
2713 limit >>= (sharp + 3) >> 2;
2714 limit = FFMIN(limit, 9 - sharp);
2716 limit = FFMAX(limit, 1);
2718 s->filter.lim_lut[lvl] = limit;
2719 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
2726 static int decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2727 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2729 VP9Context *s = ctx->priv_data;
2730 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
2731 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1), res;
2732 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
2733 s->prob.p.partition[bl][c];
2734 enum BlockPartition bp;
2735 ptrdiff_t hbs = 4 >> bl;
2738 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2739 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2740 } else if (col + hbs < s->cols) {
2741 if (row + hbs < s->rows) {
2742 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2744 case PARTITION_NONE:
2745 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2748 if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2749 yoff += hbs * 8 * s->f->linesize[0];
2750 uvoff += hbs * 4 * s->f->linesize[1];
2751 res = decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
2755 if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2758 res = decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
2761 case PARTITION_SPLIT:
2762 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2763 if (!(res = decode_sb(ctx, row, col + hbs, lflvl,
2764 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1))) {
2765 yoff += hbs * 8 * s->f->linesize[0];
2766 uvoff += hbs * 4 * s->f->linesize[1];
2767 if (!(res = decode_sb(ctx, row + hbs, col, lflvl,
2768 yoff, uvoff, bl + 1)))
2769 res = decode_sb(ctx, row + hbs, col + hbs, lflvl,
2770 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2777 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
2778 bp = PARTITION_SPLIT;
2779 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1)))
2780 res = decode_sb(ctx, row, col + hbs, lflvl,
2781 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2784 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2786 } else if (row + hbs < s->rows) {
2787 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
2788 bp = PARTITION_SPLIT;
2789 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2790 yoff += hbs * 8 * s->f->linesize[0];
2791 uvoff += hbs * 4 * s->f->linesize[1];
2792 res = decode_sb(ctx, row + hbs, col, lflvl,
2793 yoff, uvoff, bl + 1);
2797 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2800 bp = PARTITION_SPLIT;
2801 res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2803 s->counts.partition[bl][c][bp]++;
2808 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
2809 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
2811 VP9Context *s = ctx->priv_data;
2812 uint8_t *dst = s->f->data[0] + yoff, *lvl = lflvl->level;
2813 ptrdiff_t ls_y = s->f->linesize[0], ls_uv = s->f->linesize[1];
2816 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
2817 // if you think of them as acting on a 8x8 block max, we can interleave
2818 // each v/h within the single x loop, but that only works if we work on
2819 // 8 pixel blocks, and we won't always do that (we want at least 16px
2820 // to use SSE2 optimizations, perhaps 32 for AVX2)
2822 // filter edges between columns, Y plane (e.g. block1 | block2)
2823 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
2824 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
2825 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
2826 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
2827 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
2828 unsigned hm = hm1 | hm2 | hm13 | hm23;
2830 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
2832 int L = *l, H = L >> 4;
2833 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2836 if (hmask1[0] & x) {
2837 if (hmask2[0] & x) {
2838 av_assert2(l[8] == L);
2839 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
2841 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
2843 } else if (hm2 & x) {
2846 E |= s->filter.mblim_lut[L] << 8;
2847 I |= s->filter.lim_lut[L] << 8;
2848 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2850 [0](ptr, ls_y, E, I, H);
2852 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2853 [0](ptr, ls_y, E, I, H);
2856 } else if (hm2 & x) {
2857 int L = l[8], H = L >> 4;
2858 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2861 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2862 [0](ptr + 8 * ls_y, ls_y, E, I, H);
2866 int L = *l, H = L >> 4;
2867 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2872 E |= s->filter.mblim_lut[L] << 8;
2873 I |= s->filter.lim_lut[L] << 8;
2874 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
2876 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
2878 } else if (hm23 & x) {
2879 int L = l[8], H = L >> 4;
2880 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2882 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
2888 // filter edges between rows, Y plane (e.g. ------)
2890 dst = s->f->data[0] + yoff;
2892 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
2893 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
2894 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
2896 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
2899 int L = *l, H = L >> 4;
2900 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2903 if (vmask[0] & (x << 1)) {
2904 av_assert2(l[1] == L);
2905 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
2907 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
2909 } else if (vm & (x << 1)) {
2912 E |= s->filter.mblim_lut[L] << 8;
2913 I |= s->filter.lim_lut[L] << 8;
2914 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
2915 [!!(vmask[1] & (x << 1))]
2916 [1](ptr, ls_y, E, I, H);
2918 s->dsp.loop_filter_8[!!(vmask[1] & x)]
2919 [1](ptr, ls_y, E, I, H);
2921 } else if (vm & (x << 1)) {
2922 int L = l[1], H = L >> 4;
2923 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2925 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
2926 [1](ptr + 8, ls_y, E, I, H);
2930 int L = *l, H = L >> 4;
2931 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2933 if (vm3 & (x << 1)) {
2936 E |= s->filter.mblim_lut[L] << 8;
2937 I |= s->filter.lim_lut[L] << 8;
2938 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
2940 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
2942 } else if (vm3 & (x << 1)) {
2943 int L = l[1], H = L >> 4;
2944 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2946 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
2951 // same principle but for U/V planes
2952 for (p = 0; p < 2; p++) {
2954 dst = s->f->data[1 + p] + uvoff;
2955 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
2956 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
2957 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
2958 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
2959 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
2961 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
2964 int L = *l, H = L >> 4;
2965 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2967 if (hmask1[0] & x) {
2968 if (hmask2[0] & x) {
2969 av_assert2(l[16] == L);
2970 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
2972 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
2974 } else if (hm2 & x) {
2977 E |= s->filter.mblim_lut[L] << 8;
2978 I |= s->filter.lim_lut[L] << 8;
2979 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2981 [0](ptr, ls_uv, E, I, H);
2983 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2984 [0](ptr, ls_uv, E, I, H);
2986 } else if (hm2 & x) {
2987 int L = l[16], H = L >> 4;
2988 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2990 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2991 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
2999 dst = s->f->data[1 + p] + uvoff;
3000 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3001 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3002 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3004 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3007 int L = *l, H = L >> 4;
3008 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3011 if (vmask[0] & (x << 2)) {
3012 av_assert2(l[2] == L);
3013 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3015 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3017 } else if (vm & (x << 2)) {
3020 E |= s->filter.mblim_lut[L] << 8;
3021 I |= s->filter.lim_lut[L] << 8;
3022 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3023 [!!(vmask[1] & (x << 2))]
3024 [1](ptr, ls_uv, E, I, H);
3026 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3027 [1](ptr, ls_uv, E, I, H);
3029 } else if (vm & (x << 2)) {
3030 int L = l[2], H = L >> 4;
3031 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3033 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3034 [1](ptr + 8, ls_uv, E, I, H);
3044 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3046 int sb_start = ( idx * n) >> log2_n;
3047 int sb_end = ((idx + 1) * n) >> log2_n;
3048 *start = FFMIN(sb_start, n) << 3;
3049 *end = FFMIN(sb_end, n) << 3;
3052 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3053 int max_count, int update_factor)
3055 unsigned ct = ct0 + ct1, p2, p1;
3061 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3062 p2 = av_clip(p2, 1, 255);
3063 ct = FFMIN(ct, max_count);
3064 update_factor = FASTDIV(update_factor * ct, max_count);
3066 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3067 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3070 static void adapt_probs(VP9Context *s)
3073 prob_context *p = &s->prob_ctx[s->framectxid].p;
3074 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3077 for (i = 0; i < 4; i++)
3078 for (j = 0; j < 2; j++)
3079 for (k = 0; k < 2; k++)
3080 for (l = 0; l < 6; l++)
3081 for (m = 0; m < 6; m++) {
3082 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3083 unsigned *e = s->counts.eob[i][j][k][l][m];
3084 unsigned *c = s->counts.coef[i][j][k][l][m];
3086 if (l == 0 && m >= 3) // dc only has 3 pt
3089 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3090 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3091 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3094 if (s->keyframe || s->intraonly) {
3095 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3096 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3097 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3098 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3103 for (i = 0; i < 3; i++)
3104 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3107 for (i = 0; i < 4; i++)
3108 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3111 if (s->comppredmode == PRED_SWITCHABLE) {
3112 for (i = 0; i < 5; i++)
3113 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3117 if (s->comppredmode != PRED_SINGLEREF) {
3118 for (i = 0; i < 5; i++)
3119 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3120 s->counts.comp_ref[i][1], 20, 128);
3123 if (s->comppredmode != PRED_COMPREF) {
3124 for (i = 0; i < 5; i++) {
3125 uint8_t *pp = p->single_ref[i];
3126 unsigned (*c)[2] = s->counts.single_ref[i];
3128 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3129 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3133 // block partitioning
3134 for (i = 0; i < 4; i++)
3135 for (j = 0; j < 4; j++) {
3136 uint8_t *pp = p->partition[i][j];
3137 unsigned *c = s->counts.partition[i][j];
3139 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3140 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3141 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3145 if (s->txfmmode == TX_SWITCHABLE) {
3146 for (i = 0; i < 2; i++) {
3147 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3149 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3150 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3151 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3152 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3153 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3154 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3158 // interpolation filter
3159 if (s->filtermode == FILTER_SWITCHABLE) {
3160 for (i = 0; i < 4; i++) {
3161 uint8_t *pp = p->filter[i];
3162 unsigned *c = s->counts.filter[i];
3164 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3165 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3170 for (i = 0; i < 7; i++) {
3171 uint8_t *pp = p->mv_mode[i];
3172 unsigned *c = s->counts.mv_mode[i];
3174 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3175 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3176 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3181 uint8_t *pp = p->mv_joint;
3182 unsigned *c = s->counts.mv_joint;
3184 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3185 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3186 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3190 for (i = 0; i < 2; i++) {
3192 unsigned *c, (*c2)[2], sum;
3194 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3195 s->counts.mv_comp[i].sign[1], 20, 128);
3197 pp = p->mv_comp[i].classes;
3198 c = s->counts.mv_comp[i].classes;
3199 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3200 adapt_prob(&pp[0], c[0], sum, 20, 128);
3202 adapt_prob(&pp[1], c[1], sum, 20, 128);
3204 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3205 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3207 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3208 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3210 adapt_prob(&pp[6], c[6], sum, 20, 128);
3211 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3212 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3213 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3215 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3216 s->counts.mv_comp[i].class0[1], 20, 128);
3217 pp = p->mv_comp[i].bits;
3218 c2 = s->counts.mv_comp[i].bits;
3219 for (j = 0; j < 10; j++)
3220 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3222 for (j = 0; j < 2; j++) {
3223 pp = p->mv_comp[i].class0_fp[j];
3224 c = s->counts.mv_comp[i].class0_fp[j];
3225 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3226 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3227 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3229 pp = p->mv_comp[i].fp;
3230 c = s->counts.mv_comp[i].fp;
3231 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3232 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3233 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3235 if (s->highprecisionmvs) {
3236 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3237 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3238 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3239 s->counts.mv_comp[i].hp[1], 20, 128);
3244 for (i = 0; i < 4; i++) {
3245 uint8_t *pp = p->y_mode[i];
3246 unsigned *c = s->counts.y_mode[i], sum, s2;
3248 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3249 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3250 sum -= c[TM_VP8_PRED];
3251 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3252 sum -= c[VERT_PRED];
3253 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3254 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3256 adapt_prob(&pp[3], s2, sum, 20, 128);
3258 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3259 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3260 sum -= c[DIAG_DOWN_LEFT_PRED];
3261 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3262 sum -= c[VERT_LEFT_PRED];
3263 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3264 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3268 for (i = 0; i < 10; i++) {
3269 uint8_t *pp = p->uv_mode[i];
3270 unsigned *c = s->counts.uv_mode[i], sum, s2;
3272 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3273 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3274 sum -= c[TM_VP8_PRED];
3275 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3276 sum -= c[VERT_PRED];
3277 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3278 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3280 adapt_prob(&pp[3], s2, sum, 20, 128);
3282 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3283 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3284 sum -= c[DIAG_DOWN_LEFT_PRED];
3285 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3286 sum -= c[VERT_LEFT_PRED];
3287 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3288 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3292 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3294 VP9Context *s = ctx->priv_data;
3297 for (i = 0; i < 8; i++)
3298 av_frame_free(&s->refs[i]);
3299 av_freep(&s->above_partition_ctx);
3306 static int vp9_decode_frame(AVCodecContext *ctx, AVFrame *frame,
3307 int *got_frame, const uint8_t *data, int size)
3309 VP9Context *s = ctx->priv_data;
3310 int res, tile_row, tile_col, i, ref, row, col;
3311 ptrdiff_t yoff = 0, uvoff = 0;
3312 //AVFrame *prev_frame = s->f; // for segmentation map
3314 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3316 } else if (res == 0) {
3317 if (!s->refs[ref]->buf[0]) {
3318 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3319 return AVERROR_INVALIDDATA;
3321 if ((res = av_frame_ref(frame, s->refs[ref])) < 0)
3330 if ((res = ff_get_buffer(ctx, s->f,
3331 s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
3333 s->f->key_frame = s->keyframe;
3334 s->f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3336 // main tile decode loop
3337 memset(s->above_partition_ctx, 0, s->cols);
3338 memset(s->above_skip_ctx, 0, s->cols);
3339 if (s->keyframe || s->intraonly) {
3340 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3342 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3344 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3345 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3346 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3347 memset(s->above_segpred_ctx, 0, s->cols);
3348 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3349 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3350 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3351 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3354 if (tile_col == s->tiling.tile_cols - 1 &&
3355 tile_row == s->tiling.tile_rows - 1) {
3358 tile_size = AV_RB32(data);
3362 if (tile_size > size)
3363 return AVERROR_INVALIDDATA;
3364 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3365 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
3366 return AVERROR_INVALIDDATA;
3371 for (row = s->tiling.tile_row_start;
3372 row < s->tiling.tile_row_end;
3373 row += 8, yoff += s->f->linesize[0] * 64,
3374 uvoff += s->f->linesize[1] * 32) {
3375 struct VP9Filter *lflvl_ptr = s->lflvl;
3376 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3378 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3379 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3380 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3382 memset(s->left_partition_ctx, 0, 8);
3383 memset(s->left_skip_ctx, 0, 8);
3384 if (s->keyframe || s->intraonly) {
3385 memset(s->left_mode_ctx, DC_PRED, 16);
3387 memset(s->left_mode_ctx, NEARESTMV, 8);
3389 memset(s->left_y_nnz_ctx, 0, 16);
3390 memset(s->left_uv_nnz_ctx, 0, 16);
3391 memset(s->left_segpred_ctx, 0, 8);
3393 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3394 for (col = s->tiling.tile_col_start;
3395 col < s->tiling.tile_col_end;
3396 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3397 // FIXME integrate with lf code (i.e. zero after each
3398 // use, similar to invtxfm coefficients, or similar)
3399 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3401 if ((res = decode_sb(ctx, row, col, lflvl_ptr,
3402 yoff2, uvoff2, BL_64X64)) < 0)
3405 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3408 // backup pre-loopfilter reconstruction data for intra
3409 // prediction of next row of sb64s
3410 if (row + 8 < s->rows) {
3411 memcpy(s->intra_pred_data[0],
3412 s->f->data[0] + yoff + 63 * s->f->linesize[0],
3414 memcpy(s->intra_pred_data[1],
3415 s->f->data[1] + uvoff + 31 * s->f->linesize[1],
3417 memcpy(s->intra_pred_data[2],
3418 s->f->data[2] + uvoff + 31 * s->f->linesize[2],
3422 // loopfilter one row
3423 if (s->filter.level) {
3426 lflvl_ptr = s->lflvl;
3427 for (col = 0; col < s->cols;
3428 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3429 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3435 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
3436 // probability maintenance between frames)
3437 if (s->refreshctx) {
3438 if (s->parallelmode) {
3441 for (i = 0; i < 4; i++)
3442 for (j = 0; j < 2; j++)
3443 for (k = 0; k < 2; k++)
3444 for (l = 0; l < 6; l++)
3445 for (m = 0; m < 6; m++)
3446 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3447 s->prob.coef[i][j][k][l][m], 3);
3448 s->prob_ctx[s->framectxid].p = s->prob.p;
3453 FFSWAP(struct VP9mvrefPair *, s->mv[0], s->mv[1]);
3456 for (i = 0; i < 8; i++)
3457 if (s->refreshrefmask & (1 << i)) {
3458 av_frame_unref(s->refs[i]);
3459 if ((res = av_frame_ref(s->refs[i], s->f)) < 0)
3464 av_frame_unref(s->f);
3472 static int vp9_decode_packet(AVCodecContext *avctx, AVFrame *frame,
3473 int *got_frame, AVPacket *avpkt)
3475 const uint8_t *data = avpkt->data;
3476 int size = avpkt->size, marker, res;
3478 // read superframe index - this is a collection of individual frames that
3479 // together lead to one visible frame
3480 av_assert1(size > 0); // without CODEC_CAP_DELAY, this is implied
3481 marker = data[size - 1];
3482 if ((marker & 0xe0) == 0xc0) {
3483 int nbytes = 1 + ((marker >> 3) & 0x3);
3484 int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
3486 if (size >= idx_sz && data[size - idx_sz] == marker) {
3487 const uint8_t *idx = data + size + 1 - idx_sz;
3489 #define case_n(a, rd) \
3491 while (n_frames--) { \
3495 av_log(avctx, AV_LOG_ERROR, \
3496 "Superframe packet size too big: %d > %d\n", \
3498 return AVERROR_INVALIDDATA; \
3500 res = vp9_decode_frame(avctx, frame, got_frame, \
3509 case_n(2, AV_RL16(idx));
3510 case_n(3, AV_RL24(idx));
3511 case_n(4, AV_RL32(idx));
3516 // if we get here, there was no valid superframe index, i.e. this is just
3517 // one whole single frame - decode it as such from the complete input buf
3518 if ((res = vp9_decode_frame(avctx, frame, got_frame, data, size)) < 0)
3523 static void vp9_decode_flush(AVCodecContext *ctx)
3525 VP9Context *s = ctx->priv_data;
3528 for (i = 0; i < 8; i++)
3529 av_frame_unref(s->refs[i]);
3533 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3535 VP9Context *s = ctx->priv_data;
3538 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3539 ff_vp9dsp_init(&s->dsp);
3540 ff_videodsp_init(&s->vdsp, 8);
3541 for (i = 0; i < 8; i++) {
3542 s->refs[i] = av_frame_alloc();
3544 vp9_decode_free(ctx);
3545 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3546 return AVERROR(ENOMEM);
3549 s->filter.sharpness = -1;
3554 AVCodec ff_vp9_decoder = {
3556 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
3557 .type = AVMEDIA_TYPE_VIDEO,
3558 .id = AV_CODEC_ID_VP9,
3559 .priv_data_size = sizeof(VP9Context),
3560 .init = vp9_decode_init,
3561 .close = vp9_decode_free,
3562 .decode = vp9_decode_packet,
3563 .capabilities = CODEC_CAP_DR1,
3564 .flush = vp9_decode_flush,