2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 #include "libavutil/avassert.h"
34 #define VP9_SYNCCODE 0x498342
73 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
74 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
77 typedef struct VP9Block {
78 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
79 enum FilterMode filter;
80 VP56mv mv[4 /* b_idx */][2 /* ref */];
82 enum TxfmMode tx, uvtx;
84 int row, row7, col, col7;
86 ptrdiff_t y_stride, uv_stride;
89 typedef struct VP9Context {
100 uint8_t keyframe, last_keyframe;
102 uint8_t use_last_frame_mvs;
108 uint8_t refreshrefmask;
109 uint8_t highprecisionmvs;
110 enum FilterMode filtermode;
111 uint8_t allowcompinter;
114 uint8_t parallelmode;
118 uint8_t varcompref[2];
119 AVFrame *refs[8], *f, *fb[10];
125 uint8_t mblim_lut[64];
133 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
138 uint8_t absolute_vals;
144 uint8_t skip_enabled;
153 unsigned log2_tile_cols, log2_tile_rows;
154 unsigned tile_cols, tile_rows;
155 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
157 unsigned sb_cols, sb_rows, rows, cols;
160 uint8_t coef[4][2][2][6][6][3];
164 uint8_t coef[4][2][2][6][6][11];
169 unsigned y_mode[4][10];
170 unsigned uv_mode[10][10];
171 unsigned filter[4][3];
172 unsigned mv_mode[7][4];
173 unsigned intra[4][2];
175 unsigned single_ref[5][2][2];
176 unsigned comp_ref[5][2];
177 unsigned tx32p[2][4];
178 unsigned tx16p[2][3];
181 unsigned mv_joint[4];
184 unsigned classes[11];
186 unsigned bits[10][2];
187 unsigned class0_fp[2][4];
189 unsigned class0_hp[2];
192 unsigned partition[4][4][4];
193 unsigned coef[4][2][2][6][6][3];
194 unsigned eob[4][2][2][6][6][2];
196 enum TxfmMode txfmmode;
197 enum CompPredMode comppredmode;
199 // contextual (left/above) cache
200 uint8_t left_partition_ctx[8], *above_partition_ctx;
201 uint8_t left_mode_ctx[16], *above_mode_ctx;
202 // FIXME maybe merge some of the below in a flags field?
203 uint8_t left_y_nnz_ctx[16], *above_y_nnz_ctx;
204 uint8_t left_uv_nnz_ctx[2][8], *above_uv_nnz_ctx[2];
205 uint8_t left_skip_ctx[8], *above_skip_ctx; // 1bit
206 uint8_t left_txfm_ctx[8], *above_txfm_ctx; // 2bit
207 uint8_t left_segpred_ctx[8], *above_segpred_ctx; // 1bit
208 uint8_t left_intra_ctx[8], *above_intra_ctx; // 1bit
209 uint8_t left_comp_ctx[8], *above_comp_ctx; // 1bit
210 uint8_t left_ref_ctx[8], *above_ref_ctx; // 2bit
211 uint8_t left_filter_ctx[8], *above_filter_ctx;
212 VP56mv left_mv_ctx[16][2], (*above_mv_ctx)[2];
215 uint8_t *intra_pred_data[3];
216 uint8_t *segmentation_map;
217 struct VP9mvrefPair *mv[2];
218 struct VP9Filter *lflvl;
219 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
221 // block reconstruction intermediates
222 DECLARE_ALIGNED(32, int16_t, block)[4096];
223 DECLARE_ALIGNED(32, int16_t, uvblock)[2][1024];
225 uint8_t uveob[2][64];
226 VP56mv min_mv, max_mv;
227 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
228 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
231 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
233 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
234 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
236 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
237 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
241 static int update_size(AVCodecContext *ctx, int w, int h)
243 VP9Context *s = ctx->priv_data;
246 av_assert0(w > 0 && h > 0);
248 if (s->above_partition_ctx && w == ctx->width && h == ctx->height)
253 s->sb_cols = (w + 63) >> 6;
254 s->sb_rows = (h + 63) >> 6;
255 s->cols = (w + 7) >> 3;
256 s->rows = (h + 7) >> 3;
258 #define assign(var, type, n) var = (type) p; p += s->sb_cols * n * sizeof(*var)
259 av_freep(&s->above_partition_ctx);
260 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx) +
261 64 * s->sb_rows * (1 + sizeof(*s->mv[0]) * 2)));
263 return AVERROR(ENOMEM);
264 assign(s->above_partition_ctx, uint8_t *, 8);
265 assign(s->above_skip_ctx, uint8_t *, 8);
266 assign(s->above_txfm_ctx, uint8_t *, 8);
267 assign(s->above_mode_ctx, uint8_t *, 16);
268 assign(s->above_y_nnz_ctx, uint8_t *, 16);
269 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
270 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
271 assign(s->intra_pred_data[0], uint8_t *, 64);
272 assign(s->intra_pred_data[1], uint8_t *, 32);
273 assign(s->intra_pred_data[2], uint8_t *, 32);
274 assign(s->above_segpred_ctx, uint8_t *, 8);
275 assign(s->above_intra_ctx, uint8_t *, 8);
276 assign(s->above_comp_ctx, uint8_t *, 8);
277 assign(s->above_ref_ctx, uint8_t *, 8);
278 assign(s->above_filter_ctx, uint8_t *, 8);
279 assign(s->lflvl, struct VP9Filter *, 1);
280 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
281 assign(s->segmentation_map, uint8_t *, 64 * s->sb_rows);
282 assign(s->mv[0], struct VP9mvrefPair *, 64 * s->sb_rows);
283 assign(s->mv[1], struct VP9mvrefPair *, 64 * s->sb_rows);
289 // for some reason the sign bit is at the end, not the start, of a bit sequence
290 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
292 int v = get_bits(gb, n);
293 return get_bits1(gb) ? -v : v;
296 static av_always_inline int inv_recenter_nonneg(int v, int m)
298 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
301 // differential forward probability updates
302 static int update_prob(VP56RangeCoder *c, int p)
304 static const int inv_map_table[254] = {
305 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
306 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
307 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
308 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
309 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
310 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
311 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
312 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
313 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
314 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
315 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
316 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
317 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
318 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
319 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
320 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
321 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
322 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
327 /* This code is trying to do a differential probability update. For a
328 * current probability A in the range [1, 255], the difference to a new
329 * probability of any value can be expressed differentially as 1-A,255-A
330 * where some part of this (absolute range) exists both in positive as
331 * well as the negative part, whereas another part only exists in one
332 * half. We're trying to code this shared part differentially, i.e.
333 * times two where the value of the lowest bit specifies the sign, and
334 * the single part is then coded on top of this. This absolute difference
335 * then again has a value of [0,254], but a bigger value in this range
336 * indicates that we're further away from the original value A, so we
337 * can code this as a VLC code, since higher values are increasingly
338 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
339 * updates vs. the 'fine, exact' updates further down the range, which
340 * adds one extra dimension to this differential update model. */
342 if (!vp8_rac_get(c)) {
343 d = vp8_rac_get_uint(c, 4) + 0;
344 } else if (!vp8_rac_get(c)) {
345 d = vp8_rac_get_uint(c, 4) + 16;
346 } else if (!vp8_rac_get(c)) {
347 d = vp8_rac_get_uint(c, 5) + 32;
349 d = vp8_rac_get_uint(c, 7);
351 d = (d << 1) - 65 + vp8_rac_get(c);
355 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
356 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
359 static int decode_frame_header(AVCodecContext *ctx,
360 const uint8_t *data, int size, int *ref)
362 VP9Context *s = ctx->priv_data;
363 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
365 const uint8_t *data2;
368 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
369 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
372 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
373 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
374 return AVERROR_INVALIDDATA;
376 s->profile = get_bits1(&s->gb);
377 if (get_bits1(&s->gb)) { // reserved bit
378 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
379 return AVERROR_INVALIDDATA;
381 if (get_bits1(&s->gb)) {
382 *ref = get_bits(&s->gb, 3);
385 s->last_keyframe = s->keyframe;
386 s->keyframe = !get_bits1(&s->gb);
387 last_invisible = s->invisible;
388 s->invisible = !get_bits1(&s->gb);
389 s->errorres = get_bits1(&s->gb);
390 // FIXME disable this upon resolution change
391 s->use_last_frame_mvs = !s->errorres && !last_invisible;
393 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
394 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
395 return AVERROR_INVALIDDATA;
397 s->colorspace = get_bits(&s->gb, 3);
398 if (s->colorspace == 7) { // RGB = profile 1
399 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
400 return AVERROR_INVALIDDATA;
402 s->fullrange = get_bits1(&s->gb);
403 // for profile 1, here follows the subsampling bits
404 s->refreshrefmask = 0xff;
405 w = get_bits(&s->gb, 16) + 1;
406 h = get_bits(&s->gb, 16) + 1;
407 if (get_bits1(&s->gb)) // display size
408 skip_bits(&s->gb, 32);
410 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
411 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
413 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
414 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
415 return AVERROR_INVALIDDATA;
417 s->refreshrefmask = get_bits(&s->gb, 8);
418 w = get_bits(&s->gb, 16) + 1;
419 h = get_bits(&s->gb, 16) + 1;
420 if (get_bits1(&s->gb)) // display size
421 skip_bits(&s->gb, 32);
423 s->refreshrefmask = get_bits(&s->gb, 8);
424 s->refidx[0] = get_bits(&s->gb, 3);
425 s->signbias[0] = get_bits1(&s->gb);
426 s->refidx[1] = get_bits(&s->gb, 3);
427 s->signbias[1] = get_bits1(&s->gb);
428 s->refidx[2] = get_bits(&s->gb, 3);
429 s->signbias[2] = get_bits1(&s->gb);
430 if (!s->refs[s->refidx[0]] || !s->refs[s->refidx[1]] ||
431 !s->refs[s->refidx[2]]) {
432 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
433 return AVERROR_INVALIDDATA;
435 if (get_bits1(&s->gb)) {
436 w = s->refs[s->refidx[0]]->width;
437 h = s->refs[s->refidx[0]]->height;
438 } else if (get_bits1(&s->gb)) {
439 w = s->refs[s->refidx[1]]->width;
440 h = s->refs[s->refidx[1]]->height;
441 } else if (get_bits1(&s->gb)) {
442 w = s->refs[s->refidx[2]]->width;
443 h = s->refs[s->refidx[2]]->height;
445 w = get_bits(&s->gb, 16) + 1;
446 h = get_bits(&s->gb, 16) + 1;
448 if (get_bits1(&s->gb)) // display size
449 skip_bits(&s->gb, 32);
450 s->highprecisionmvs = get_bits1(&s->gb);
451 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
453 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
454 s->signbias[0] != s->signbias[2];
455 if (s->allowcompinter) {
456 if (s->signbias[0] == s->signbias[1]) {
458 s->varcompref[0] = 0;
459 s->varcompref[1] = 1;
460 } else if (s->signbias[0] == s->signbias[2]) {
462 s->varcompref[0] = 0;
463 s->varcompref[1] = 2;
466 s->varcompref[0] = 1;
467 s->varcompref[1] = 2;
472 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
473 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
474 s->framectxid = c = get_bits(&s->gb, 2);
476 /* loopfilter header data */
477 s->filter.level = get_bits(&s->gb, 6);
478 sharp = get_bits(&s->gb, 3);
479 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
480 // the old cache values since they are still valid
481 if (s->filter.sharpness != sharp)
482 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
483 s->filter.sharpness = sharp;
484 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
485 if (get_bits1(&s->gb)) {
486 for (i = 0; i < 4; i++)
487 if (get_bits1(&s->gb))
488 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
489 for (i = 0; i < 2; i++)
490 if (get_bits1(&s->gb))
491 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
494 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
497 /* quantization header data */
498 s->yac_qi = get_bits(&s->gb, 8);
499 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
500 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
501 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
502 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
503 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
505 /* segmentation header info */
506 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
507 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
508 for (i = 0; i < 7; i++)
509 s->prob.seg[i] = get_bits1(&s->gb) ?
510 get_bits(&s->gb, 8) : 255;
511 if ((s->segmentation.temporal = get_bits1(&s->gb)))
512 for (i = 0; i < 3; i++)
513 s->prob.segpred[i] = get_bits1(&s->gb) ?
514 get_bits(&s->gb, 8) : 255;
517 if (get_bits1(&s->gb)) {
518 s->segmentation.absolute_vals = get_bits1(&s->gb);
519 for (i = 0; i < 8; i++) {
520 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
521 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
522 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
523 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
524 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
525 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
526 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
530 s->segmentation.feat[0].q_enabled = 0;
531 s->segmentation.feat[0].lf_enabled = 0;
532 s->segmentation.feat[0].skip_enabled = 0;
533 s->segmentation.feat[0].ref_enabled = 0;
536 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
537 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
538 int qyac, qydc, quvac, quvdc, lflvl, sh;
540 if (s->segmentation.feat[i].q_enabled) {
541 if (s->segmentation.absolute_vals)
542 qyac = s->segmentation.feat[i].q_val;
544 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
548 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
549 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
550 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
551 qyac = av_clip_uintp2(qyac, 8);
553 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
554 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
555 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
556 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
558 sh = s->filter.level >= 32;
559 if (s->segmentation.feat[i].lf_enabled) {
560 if (s->segmentation.absolute_vals)
561 lflvl = s->segmentation.feat[i].lf_val;
563 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
565 lflvl = s->filter.level;
567 s->segmentation.feat[i].lflvl[0][0] =
568 s->segmentation.feat[i].lflvl[0][1] =
569 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
570 for (j = 1; j < 4; j++) {
571 s->segmentation.feat[i].lflvl[j][0] =
572 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
573 s->lf_delta.mode[0]) << sh), 6);
574 s->segmentation.feat[i].lflvl[j][1] =
575 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
576 s->lf_delta.mode[1]) << sh), 6);
581 if ((res = update_size(ctx, w, h)) < 0) {
582 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
585 for (s->tiling.log2_tile_cols = 0;
586 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
587 s->tiling.log2_tile_cols++) ;
588 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
589 max = FFMAX(0, max - 1);
590 while (max > s->tiling.log2_tile_cols) {
591 if (get_bits1(&s->gb))
592 s->tiling.log2_tile_cols++;
596 s->tiling.log2_tile_rows = decode012(&s->gb);
597 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
598 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
599 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
600 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
601 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
603 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
604 return AVERROR(ENOMEM);
608 if (s->keyframe || s->errorres || s->intraonly) {
609 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
610 s->prob_ctx[3].p = vp9_default_probs;
611 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
612 sizeof(vp9_default_coef_probs));
613 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
614 sizeof(vp9_default_coef_probs));
615 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
616 sizeof(vp9_default_coef_probs));
617 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
618 sizeof(vp9_default_coef_probs));
621 // next 16 bits is size of the rest of the header (arith-coded)
622 size2 = get_bits(&s->gb, 16);
623 data2 = align_get_bits(&s->gb);
624 if (size2 > size - (data2 - data)) {
625 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
626 return AVERROR_INVALIDDATA;
628 ff_vp56_init_range_decoder(&s->c, data2, size2);
629 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
630 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
631 return AVERROR_INVALIDDATA;
634 if (s->keyframe || s->intraonly) {
635 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
637 memset(&s->counts, 0, sizeof(s->counts));
639 // FIXME is it faster to not copy here, but do it down in the fw updates
640 // as explicit copies if the fw update is missing (and skip the copy upon
642 s->prob.p = s->prob_ctx[c].p;
646 s->txfmmode = TX_4X4;
648 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
649 if (s->txfmmode == 3)
650 s->txfmmode += vp8_rac_get(&s->c);
652 if (s->txfmmode == TX_SWITCHABLE) {
653 for (i = 0; i < 2; i++)
654 if (vp56_rac_get_prob_branchy(&s->c, 252))
655 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
656 for (i = 0; i < 2; i++)
657 for (j = 0; j < 2; j++)
658 if (vp56_rac_get_prob_branchy(&s->c, 252))
659 s->prob.p.tx16p[i][j] =
660 update_prob(&s->c, s->prob.p.tx16p[i][j]);
661 for (i = 0; i < 2; i++)
662 for (j = 0; j < 3; j++)
663 if (vp56_rac_get_prob_branchy(&s->c, 252))
664 s->prob.p.tx32p[i][j] =
665 update_prob(&s->c, s->prob.p.tx32p[i][j]);
670 for (i = 0; i < 4; i++) {
671 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
672 if (vp8_rac_get(&s->c)) {
673 for (j = 0; j < 2; j++)
674 for (k = 0; k < 2; k++)
675 for (l = 0; l < 6; l++)
676 for (m = 0; m < 6; m++) {
677 uint8_t *p = s->prob.coef[i][j][k][l][m];
678 uint8_t *r = ref[j][k][l][m];
679 if (m >= 3 && l == 0) // dc only has 3 pt
681 for (n = 0; n < 3; n++) {
682 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
683 p[n] = update_prob(&s->c, r[n]);
691 for (j = 0; j < 2; j++)
692 for (k = 0; k < 2; k++)
693 for (l = 0; l < 6; l++)
694 for (m = 0; m < 6; m++) {
695 uint8_t *p = s->prob.coef[i][j][k][l][m];
696 uint8_t *r = ref[j][k][l][m];
697 if (m > 3 && l == 0) // dc only has 3 pt
703 if (s->txfmmode == i)
708 for (i = 0; i < 3; i++)
709 if (vp56_rac_get_prob_branchy(&s->c, 252))
710 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
711 if (!s->keyframe && !s->intraonly) {
712 for (i = 0; i < 7; i++)
713 for (j = 0; j < 3; j++)
714 if (vp56_rac_get_prob_branchy(&s->c, 252))
715 s->prob.p.mv_mode[i][j] =
716 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
718 if (s->filtermode == FILTER_SWITCHABLE)
719 for (i = 0; i < 4; i++)
720 for (j = 0; j < 2; j++)
721 if (vp56_rac_get_prob_branchy(&s->c, 252))
722 s->prob.p.filter[i][j] =
723 update_prob(&s->c, s->prob.p.filter[i][j]);
725 for (i = 0; i < 4; i++)
726 if (vp56_rac_get_prob_branchy(&s->c, 252))
727 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
729 if (s->allowcompinter) {
730 s->comppredmode = vp8_rac_get(&s->c);
732 s->comppredmode += vp8_rac_get(&s->c);
733 if (s->comppredmode == PRED_SWITCHABLE)
734 for (i = 0; i < 5; i++)
735 if (vp56_rac_get_prob_branchy(&s->c, 252))
737 update_prob(&s->c, s->prob.p.comp[i]);
739 s->comppredmode = PRED_SINGLEREF;
742 if (s->comppredmode != PRED_COMPREF) {
743 for (i = 0; i < 5; i++) {
744 if (vp56_rac_get_prob_branchy(&s->c, 252))
745 s->prob.p.single_ref[i][0] =
746 update_prob(&s->c, s->prob.p.single_ref[i][0]);
747 if (vp56_rac_get_prob_branchy(&s->c, 252))
748 s->prob.p.single_ref[i][1] =
749 update_prob(&s->c, s->prob.p.single_ref[i][1]);
753 if (s->comppredmode != PRED_SINGLEREF) {
754 for (i = 0; i < 5; i++)
755 if (vp56_rac_get_prob_branchy(&s->c, 252))
756 s->prob.p.comp_ref[i] =
757 update_prob(&s->c, s->prob.p.comp_ref[i]);
760 for (i = 0; i < 4; i++)
761 for (j = 0; j < 9; j++)
762 if (vp56_rac_get_prob_branchy(&s->c, 252))
763 s->prob.p.y_mode[i][j] =
764 update_prob(&s->c, s->prob.p.y_mode[i][j]);
766 for (i = 0; i < 4; i++)
767 for (j = 0; j < 4; j++)
768 for (k = 0; k < 3; k++)
769 if (vp56_rac_get_prob_branchy(&s->c, 252))
770 s->prob.p.partition[3 - i][j][k] =
771 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
773 // mv fields don't use the update_prob subexp model for some reason
774 for (i = 0; i < 3; i++)
775 if (vp56_rac_get_prob_branchy(&s->c, 252))
776 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
778 for (i = 0; i < 2; i++) {
779 if (vp56_rac_get_prob_branchy(&s->c, 252))
780 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
782 for (j = 0; j < 10; j++)
783 if (vp56_rac_get_prob_branchy(&s->c, 252))
784 s->prob.p.mv_comp[i].classes[j] =
785 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
787 if (vp56_rac_get_prob_branchy(&s->c, 252))
788 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
790 for (j = 0; j < 10; j++)
791 if (vp56_rac_get_prob_branchy(&s->c, 252))
792 s->prob.p.mv_comp[i].bits[j] =
793 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
796 for (i = 0; i < 2; i++) {
797 for (j = 0; j < 2; j++)
798 for (k = 0; k < 3; k++)
799 if (vp56_rac_get_prob_branchy(&s->c, 252))
800 s->prob.p.mv_comp[i].class0_fp[j][k] =
801 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
803 for (j = 0; j < 3; j++)
804 if (vp56_rac_get_prob_branchy(&s->c, 252))
805 s->prob.p.mv_comp[i].fp[j] =
806 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
809 if (s->highprecisionmvs) {
810 for (i = 0; i < 2; i++) {
811 if (vp56_rac_get_prob_branchy(&s->c, 252))
812 s->prob.p.mv_comp[i].class0_hp =
813 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
815 if (vp56_rac_get_prob_branchy(&s->c, 252))
816 s->prob.p.mv_comp[i].hp =
817 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
822 return (data2 - data) + size2;
825 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
828 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
829 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
832 static void find_ref_mvs(VP9Context *s,
833 VP56mv *pmv, int ref, int z, int idx, int sb)
835 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
836 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
837 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
838 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
839 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
840 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
841 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
842 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
843 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
844 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
845 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
846 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
847 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
848 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
849 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
850 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
851 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
852 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
853 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
854 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
855 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
856 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
857 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
858 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
859 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
860 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
861 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
863 VP9Block *const b = &s->b;
864 int row = b->row, col = b->col, row7 = b->row7;
865 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
866 #define INVALID_MV 0x80008000U
867 uint32_t mem = INVALID_MV;
870 #define RETURN_DIRECT_MV(mv) \
872 uint32_t m = AV_RN32A(&mv); \
876 } else if (mem == INVALID_MV) { \
878 } else if (m != mem) { \
885 if (sb == 2 || sb == 1) {
886 RETURN_DIRECT_MV(b->mv[0][z]);
887 } else if (sb == 3) {
888 RETURN_DIRECT_MV(b->mv[2][z]);
889 RETURN_DIRECT_MV(b->mv[1][z]);
890 RETURN_DIRECT_MV(b->mv[0][z]);
893 #define RETURN_MV(mv) \
898 clamp_mv(&tmp, &mv, s); \
899 m = AV_RN32A(&tmp); \
903 } else if (mem == INVALID_MV) { \
905 } else if (m != mem) { \
910 uint32_t m = AV_RN32A(&mv); \
912 clamp_mv(pmv, &mv, s); \
914 } else if (mem == INVALID_MV) { \
916 } else if (m != mem) { \
917 clamp_mv(pmv, &mv, s); \
924 struct VP9mvrefPair *mv = &s->mv[0][(row - 1) * s->sb_cols * 8 + col];
925 if (mv->ref[0] == ref) {
926 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
927 } else if (mv->ref[1] == ref) {
928 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
931 if (col > s->tiling.tile_col_start) {
932 struct VP9mvrefPair *mv = &s->mv[0][row * s->sb_cols * 8 + col - 1];
933 if (mv->ref[0] == ref) {
934 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
935 } else if (mv->ref[1] == ref) {
936 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
944 // previously coded MVs in this neighbourhood, using same reference frame
946 int c = p[i][0] + col, r = p[i][1] + row;
948 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
949 struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
951 if (mv->ref[0] == ref) {
952 RETURN_MV(mv->mv[0]);
953 } else if (mv->ref[1] == ref) {
954 RETURN_MV(mv->mv[1]);
959 // MV at this position in previous frame, using same reference frame
960 if (s->use_last_frame_mvs) {
961 struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
963 if (mv->ref[0] == ref) {
964 RETURN_MV(mv->mv[0]);
965 } else if (mv->ref[1] == ref) {
966 RETURN_MV(mv->mv[1]);
970 #define RETURN_SCALE_MV(mv, scale) \
973 VP56mv mv_temp = { -mv.x, -mv.y }; \
974 RETURN_MV(mv_temp); \
980 // previously coded MVs in this neighbourhood, using different reference frame
981 for (i = 0; i < 8; i++) {
982 int c = p[i][0] + col, r = p[i][1] + row;
984 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
985 struct VP9mvrefPair *mv = &s->mv[0][r * s->sb_cols * 8 + c];
987 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
988 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
990 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
991 // BUG - libvpx has this condition regardless of whether
992 // we used the first ref MV and pre-scaling
993 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
994 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
999 // MV at this position in previous frame, using different reference frame
1000 if (s->use_last_frame_mvs) {
1001 struct VP9mvrefPair *mv = &s->mv[1][row * s->sb_cols * 8 + col];
1003 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1004 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1006 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1007 // BUG - libvpx has this condition regardless of whether
1008 // we used the first ref MV and pre-scaling
1009 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1010 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1017 #undef RETURN_SCALE_MV
1020 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1022 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1023 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1024 s->prob.p.mv_comp[idx].classes);
1026 s->counts.mv_comp[idx].sign[sign]++;
1027 s->counts.mv_comp[idx].classes[c]++;
1031 for (n = 0, m = 0; m < c; m++) {
1032 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1034 s->counts.mv_comp[idx].bits[m][bit]++;
1037 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1039 s->counts.mv_comp[idx].fp[bit]++;
1041 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1042 s->counts.mv_comp[idx].hp[bit]++;
1046 // bug in libvpx - we count for bw entropy purposes even if the
1048 s->counts.mv_comp[idx].hp[1]++;
1052 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1053 s->counts.mv_comp[idx].class0[n]++;
1054 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1055 s->prob.p.mv_comp[idx].class0_fp[n]);
1056 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1057 n = (n << 3) | (bit << 1);
1059 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1060 s->counts.mv_comp[idx].class0_hp[bit]++;
1064 // bug in libvpx - we count for bw entropy purposes even if the
1066 s->counts.mv_comp[idx].class0_hp[1]++;
1070 return sign ? -(n + 1) : (n + 1);
1073 static void fill_mv(VP9Context *s,
1074 VP56mv *mv, int mode, int sb)
1076 VP9Block *const b = &s->b;
1078 if (mode == ZEROMV) {
1079 memset(mv, 0, sizeof(*mv) * 2);
1083 // FIXME cache this value and reuse for other subblocks
1084 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1085 mode == NEWMV ? -1 : sb);
1086 // FIXME maybe move this code into find_ref_mvs()
1087 if ((mode == NEWMV || sb == -1) &&
1088 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1102 if (mode == NEWMV) {
1103 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1104 s->prob.p.mv_joint);
1106 s->counts.mv_joint[j]++;
1107 if (j >= MV_JOINT_V)
1108 mv[0].y += read_mv_component(s, 0, hp);
1110 mv[0].x += read_mv_component(s, 1, hp);
1114 // FIXME cache this value and reuse for other subblocks
1115 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1116 mode == NEWMV ? -1 : sb);
1117 if ((mode == NEWMV || sb == -1) &&
1118 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1132 if (mode == NEWMV) {
1133 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1134 s->prob.p.mv_joint);
1136 s->counts.mv_joint[j]++;
1137 if (j >= MV_JOINT_V)
1138 mv[1].y += read_mv_component(s, 0, hp);
1140 mv[1].x += read_mv_component(s, 1, hp);
1146 static void decode_mode(AVCodecContext *ctx)
1148 static const uint8_t left_ctx[N_BS_SIZES] = {
1149 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1151 static const uint8_t above_ctx[N_BS_SIZES] = {
1152 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1154 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1155 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1156 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1158 VP9Context *s = ctx->priv_data;
1159 VP9Block *const b = &s->b;
1160 int row = b->row, col = b->col, row7 = b->row7;
1161 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1162 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1163 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1164 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1166 if (!s->segmentation.enabled) {
1168 } else if (s->keyframe || s->intraonly) {
1169 b->seg_id = s->segmentation.update_map ?
1170 vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg) : 0;
1171 } else if (!s->segmentation.update_map ||
1172 (s->segmentation.temporal &&
1173 vp56_rac_get_prob_branchy(&s->c,
1174 s->prob.segpred[s->above_segpred_ctx[col] +
1175 s->left_segpred_ctx[row7]]))) {
1178 for (y = 0; y < h4; y++)
1179 for (x = 0; x < w4; x++)
1180 pred = FFMIN(pred, s->segmentation_map[(y + row) * 8 * s->sb_cols + x + col]);
1181 av_assert1(pred < 8);
1184 memset(&s->above_segpred_ctx[col], 1, w4);
1185 memset(&s->left_segpred_ctx[row7], 1, h4);
1187 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1190 memset(&s->above_segpred_ctx[col], 0, w4);
1191 memset(&s->left_segpred_ctx[row7], 0, h4);
1193 if ((s->segmentation.enabled && s->segmentation.update_map) || s->keyframe) {
1194 for (y = 0; y < h4; y++)
1195 memset(&s->segmentation_map[(y + row) * 8 * s->sb_cols + col],
1199 b->skip = s->segmentation.enabled &&
1200 s->segmentation.feat[b->seg_id].skip_enabled;
1202 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1203 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1204 s->counts.skip[c][b->skip]++;
1207 if (s->keyframe || s->intraonly) {
1209 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1210 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1214 if (have_a && have_l) {
1215 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1218 c = have_a ? 2 * s->above_intra_ctx[col] :
1219 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1221 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1222 s->counts.intra[c][bit]++;
1226 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1230 c = (s->above_skip_ctx[col] ? max_tx :
1231 s->above_txfm_ctx[col]) +
1232 (s->left_skip_ctx[row7] ? max_tx :
1233 s->left_txfm_ctx[row7]) > max_tx;
1235 c = s->above_skip_ctx[col] ? 1 :
1236 (s->above_txfm_ctx[col] * 2 > max_tx);
1238 } else if (have_l) {
1239 c = s->left_skip_ctx[row7] ? 1 :
1240 (s->left_txfm_ctx[row7] * 2 > max_tx);
1246 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1248 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1250 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1252 s->counts.tx32p[c][b->tx]++;
1255 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1257 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1258 s->counts.tx16p[c][b->tx]++;
1261 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1262 s->counts.tx8p[c][b->tx]++;
1269 b->tx = FFMIN(max_tx, s->txfmmode);
1272 if (s->keyframe || s->intraonly) {
1273 uint8_t *a = &s->above_mode_ctx[col * 2];
1274 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1277 if (b->bs > BS_8x8) {
1278 // FIXME the memory storage intermediates here aren't really
1279 // necessary, they're just there to make the code slightly
1281 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1282 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1283 if (b->bs != BS_8x4) {
1284 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1285 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1286 l[0] = a[1] = b->mode[1];
1288 l[0] = a[1] = b->mode[1] = b->mode[0];
1290 if (b->bs != BS_4x8) {
1291 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1292 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1293 if (b->bs != BS_8x4) {
1294 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1295 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1296 l[1] = a[1] = b->mode[3];
1298 l[1] = a[1] = b->mode[3] = b->mode[2];
1301 b->mode[2] = b->mode[0];
1302 l[1] = a[1] = b->mode[3] = b->mode[1];
1305 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1306 vp9_default_kf_ymode_probs[*a][*l]);
1307 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1308 // FIXME this can probably be optimized
1309 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1310 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1312 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1313 vp9_default_kf_uvmode_probs[b->mode[3]]);
1314 } else if (b->intra) {
1316 if (b->bs > BS_8x8) {
1317 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1318 s->prob.p.y_mode[0]);
1319 s->counts.y_mode[0][b->mode[0]]++;
1320 if (b->bs != BS_8x4) {
1321 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1322 s->prob.p.y_mode[0]);
1323 s->counts.y_mode[0][b->mode[1]]++;
1325 b->mode[1] = b->mode[0];
1327 if (b->bs != BS_4x8) {
1328 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1329 s->prob.p.y_mode[0]);
1330 s->counts.y_mode[0][b->mode[2]]++;
1331 if (b->bs != BS_8x4) {
1332 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1333 s->prob.p.y_mode[0]);
1334 s->counts.y_mode[0][b->mode[3]]++;
1336 b->mode[3] = b->mode[2];
1339 b->mode[2] = b->mode[0];
1340 b->mode[3] = b->mode[1];
1343 static const uint8_t size_group[10] = {
1344 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1346 int sz = size_group[b->bs];
1348 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1349 s->prob.p.y_mode[sz]);
1350 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1351 s->counts.y_mode[sz][b->mode[3]]++;
1353 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1354 s->prob.p.uv_mode[b->mode[3]]);
1355 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1357 static const uint8_t inter_mode_ctx_lut[14][14] = {
1358 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1359 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1360 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1361 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1362 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1363 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1364 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1365 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1366 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1367 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1368 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1369 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1370 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1371 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1374 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1375 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1377 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1379 // read comp_pred flag
1380 if (s->comppredmode != PRED_SWITCHABLE) {
1381 b->comp = s->comppredmode == PRED_COMPREF;
1385 // FIXME add intra as ref=0xff (or -1) to make these easier?
1388 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1390 } else if (s->above_comp_ctx[col]) {
1391 c = 2 + (s->left_intra_ctx[row7] ||
1392 s->left_ref_ctx[row7] == s->fixcompref);
1393 } else if (s->left_comp_ctx[row7]) {
1394 c = 2 + (s->above_intra_ctx[col] ||
1395 s->above_ref_ctx[col] == s->fixcompref);
1397 c = (!s->above_intra_ctx[col] &&
1398 s->above_ref_ctx[col] == s->fixcompref) ^
1399 (!s->left_intra_ctx[row7] &&
1400 s->left_ref_ctx[row & 7] == s->fixcompref);
1403 c = s->above_comp_ctx[col] ? 3 :
1404 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1406 } else if (have_l) {
1407 c = s->left_comp_ctx[row7] ? 3 :
1408 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1412 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1413 s->counts.comp[c][b->comp]++;
1416 // read actual references
1417 // FIXME probably cache a few variables here to prevent repetitive
1418 // memory accesses below
1419 if (b->comp) /* two references */ {
1420 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1422 b->ref[fix_idx] = s->fixcompref;
1423 // FIXME can this codeblob be replaced by some sort of LUT?
1426 if (s->above_intra_ctx[col]) {
1427 if (s->left_intra_ctx[row7]) {
1430 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1432 } else if (s->left_intra_ctx[row7]) {
1433 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1435 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1437 if (refl == refa && refa == s->varcompref[1]) {
1439 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1440 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1441 (refl == s->fixcompref && refa == s->varcompref[0])) {
1444 c = (refa == refl) ? 3 : 1;
1446 } else if (!s->left_comp_ctx[row7]) {
1447 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1450 c = (refl == s->varcompref[1] &&
1451 refa != s->varcompref[1]) ? 2 : 4;
1453 } else if (!s->above_comp_ctx[col]) {
1454 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1457 c = (refa == s->varcompref[1] &&
1458 refl != s->varcompref[1]) ? 2 : 4;
1461 c = (refl == refa) ? 4 : 2;
1465 if (s->above_intra_ctx[col]) {
1467 } else if (s->above_comp_ctx[col]) {
1468 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1470 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1473 } else if (have_l) {
1474 if (s->left_intra_ctx[row7]) {
1476 } else if (s->left_comp_ctx[row7]) {
1477 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1479 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1484 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1485 b->ref[var_idx] = s->varcompref[bit];
1486 s->counts.comp_ref[c][bit]++;
1487 } else /* single reference */ {
1490 if (have_a && !s->above_intra_ctx[col]) {
1491 if (have_l && !s->left_intra_ctx[row7]) {
1492 if (s->left_comp_ctx[row7]) {
1493 if (s->above_comp_ctx[col]) {
1494 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1495 !s->above_ref_ctx[col]);
1497 c = (3 * !s->above_ref_ctx[col]) +
1498 (!s->fixcompref || !s->left_ref_ctx[row7]);
1500 } else if (s->above_comp_ctx[col]) {
1501 c = (3 * !s->left_ref_ctx[row7]) +
1502 (!s->fixcompref || !s->above_ref_ctx[col]);
1504 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1506 } else if (s->above_intra_ctx[col]) {
1508 } else if (s->above_comp_ctx[col]) {
1509 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1511 c = 4 * (!s->above_ref_ctx[col]);
1513 } else if (have_l && !s->left_intra_ctx[row7]) {
1514 if (s->left_intra_ctx[row7]) {
1516 } else if (s->left_comp_ctx[row7]) {
1517 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1519 c = 4 * (!s->left_ref_ctx[row7]);
1524 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1525 s->counts.single_ref[c][0][bit]++;
1529 // FIXME can this codeblob be replaced by some sort of LUT?
1532 if (s->left_intra_ctx[row7]) {
1533 if (s->above_intra_ctx[col]) {
1535 } else if (s->above_comp_ctx[col]) {
1536 c = 1 + 2 * (s->fixcompref == 1 ||
1537 s->above_ref_ctx[col] == 1);
1538 } else if (!s->above_ref_ctx[col]) {
1541 c = 4 * (s->above_ref_ctx[col] == 1);
1543 } else if (s->above_intra_ctx[col]) {
1544 if (s->left_intra_ctx[row7]) {
1546 } else if (s->left_comp_ctx[row7]) {
1547 c = 1 + 2 * (s->fixcompref == 1 ||
1548 s->left_ref_ctx[row7] == 1);
1549 } else if (!s->left_ref_ctx[row7]) {
1552 c = 4 * (s->left_ref_ctx[row7] == 1);
1554 } else if (s->above_comp_ctx[col]) {
1555 if (s->left_comp_ctx[row7]) {
1556 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1557 c = 3 * (s->fixcompref == 1 ||
1558 s->left_ref_ctx[row7] == 1);
1562 } else if (!s->left_ref_ctx[row7]) {
1563 c = 1 + 2 * (s->fixcompref == 1 ||
1564 s->above_ref_ctx[col] == 1);
1566 c = 3 * (s->left_ref_ctx[row7] == 1) +
1567 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1569 } else if (s->left_comp_ctx[row7]) {
1570 if (!s->above_ref_ctx[col]) {
1571 c = 1 + 2 * (s->fixcompref == 1 ||
1572 s->left_ref_ctx[row7] == 1);
1574 c = 3 * (s->above_ref_ctx[col] == 1) +
1575 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1577 } else if (!s->above_ref_ctx[col]) {
1578 if (!s->left_ref_ctx[row7]) {
1581 c = 4 * (s->left_ref_ctx[row7] == 1);
1583 } else if (!s->left_ref_ctx[row7]) {
1584 c = 4 * (s->above_ref_ctx[col] == 1);
1586 c = 2 * (s->left_ref_ctx[row7] == 1) +
1587 2 * (s->above_ref_ctx[col] == 1);
1590 if (s->above_intra_ctx[col] ||
1591 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1593 } else if (s->above_comp_ctx[col]) {
1594 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1596 c = 4 * (s->above_ref_ctx[col] == 1);
1599 } else if (have_l) {
1600 if (s->left_intra_ctx[row7] ||
1601 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1603 } else if (s->left_comp_ctx[row7]) {
1604 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1606 c = 4 * (s->left_ref_ctx[row7] == 1);
1611 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1612 s->counts.single_ref[c][1][bit]++;
1613 b->ref[0] = 1 + bit;
1618 if (b->bs <= BS_8x8) {
1619 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1620 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1622 static const uint8_t off[10] = {
1623 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1626 // FIXME this needs to use the LUT tables from find_ref_mvs
1627 // because not all are -1,0/0,-1
1628 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1629 [s->left_mode_ctx[row7 + off[b->bs]]];
1631 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1632 s->prob.p.mv_mode[c]);
1633 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1634 s->counts.mv_mode[c][b->mode[0] - 10]++;
1638 if (s->filtermode == FILTER_SWITCHABLE) {
1641 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1642 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1643 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1644 s->left_filter_ctx[row7] : 3;
1646 c = s->above_filter_ctx[col];
1648 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1649 c = s->left_filter_ctx[row7];
1654 b->filter = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1655 s->prob.p.filter[c]);
1656 s->counts.filter[c][b->filter]++;
1658 b->filter = s->filtermode;
1661 if (b->bs > BS_8x8) {
1662 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1664 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1665 s->prob.p.mv_mode[c]);
1666 s->counts.mv_mode[c][b->mode[0] - 10]++;
1667 fill_mv(s, b->mv[0], b->mode[0], 0);
1669 if (b->bs != BS_8x4) {
1670 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1671 s->prob.p.mv_mode[c]);
1672 s->counts.mv_mode[c][b->mode[1] - 10]++;
1673 fill_mv(s, b->mv[1], b->mode[1], 1);
1675 b->mode[1] = b->mode[0];
1676 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1677 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1680 if (b->bs != BS_4x8) {
1681 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1682 s->prob.p.mv_mode[c]);
1683 s->counts.mv_mode[c][b->mode[2] - 10]++;
1684 fill_mv(s, b->mv[2], b->mode[2], 2);
1686 if (b->bs != BS_8x4) {
1687 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1688 s->prob.p.mv_mode[c]);
1689 s->counts.mv_mode[c][b->mode[3] - 10]++;
1690 fill_mv(s, b->mv[3], b->mode[3], 3);
1692 b->mode[3] = b->mode[2];
1693 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1694 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1697 b->mode[2] = b->mode[0];
1698 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1699 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1700 b->mode[3] = b->mode[1];
1701 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1702 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1705 fill_mv(s, b->mv[0], b->mode[0], -1);
1706 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1707 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1708 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1709 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1710 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1711 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1715 // FIXME this can probably be optimized
1716 memset(&s->above_skip_ctx[col], b->skip, w4);
1717 memset(&s->left_skip_ctx[row7], b->skip, h4);
1718 memset(&s->above_txfm_ctx[col], b->tx, w4);
1719 memset(&s->left_txfm_ctx[row7], b->tx, h4);
1720 memset(&s->above_partition_ctx[col], above_ctx[b->bs], w4);
1721 memset(&s->left_partition_ctx[row7], left_ctx[b->bs], h4);
1722 if (!s->keyframe && !s->intraonly) {
1723 memset(&s->above_intra_ctx[col], b->intra, w4);
1724 memset(&s->left_intra_ctx[row7], b->intra, h4);
1725 memset(&s->above_comp_ctx[col], b->comp, w4);
1726 memset(&s->left_comp_ctx[row7], b->comp, h4);
1727 memset(&s->above_mode_ctx[col], b->mode[3], w4);
1728 memset(&s->left_mode_ctx[row7], b->mode[3], h4);
1729 if (s->filtermode == FILTER_SWITCHABLE && !b->intra ) {
1730 memset(&s->above_filter_ctx[col], b->filter, w4);
1731 memset(&s->left_filter_ctx[row7], b->filter, h4);
1732 b->filter = vp9_filter_lut[b->filter];
1734 if (b->bs > BS_8x8) {
1735 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1737 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1738 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1739 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1740 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1741 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1742 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1743 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1744 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1746 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1748 for (n = 0; n < w4 * 2; n++) {
1749 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1750 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1752 for (n = 0; n < h4 * 2; n++) {
1753 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1754 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1758 if (!b->intra) { // FIXME write 0xff or -1 if intra, so we can use this
1759 // as a direct check in above branches
1760 int vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1762 memset(&s->above_ref_ctx[col], vref, w4);
1763 memset(&s->left_ref_ctx[row7], vref, h4);
1768 for (y = 0; y < h4; y++) {
1769 int x, o = (row + y) * s->sb_cols * 8 + col;
1772 for (x = 0; x < w4; x++) {
1773 s->mv[0][o + x].ref[0] =
1774 s->mv[0][o + x].ref[1] = -1;
1776 } else if (b->comp) {
1777 for (x = 0; x < w4; x++) {
1778 s->mv[0][o + x].ref[0] = b->ref[0];
1779 s->mv[0][o + x].ref[1] = b->ref[1];
1780 AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1781 AV_COPY32(&s->mv[0][o + x].mv[1], &b->mv[3][1]);
1784 for (x = 0; x < w4; x++) {
1785 s->mv[0][o + x].ref[0] = b->ref[0];
1786 s->mv[0][o + x].ref[1] = -1;
1787 AV_COPY32(&s->mv[0][o + x].mv[0], &b->mv[3][0]);
1793 // FIXME remove tx argument, and merge cnt/eob arguments?
1794 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
1795 enum TxfmMode tx, unsigned (*cnt)[6][3],
1796 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
1797 int nnz, const int16_t *scan, const int16_t (*nb)[2],
1798 const int16_t *band_counts, const int16_t *qmul)
1800 int i = 0, band = 0, band_left = band_counts[band];
1801 uint8_t *tp = p[0][nnz];
1802 uint8_t cache[1024];
1807 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
1808 eob[band][nnz][val]++;
1813 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
1814 cnt[band][nnz][0]++;
1816 band_left = band_counts[++band];
1818 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1820 if (++i == n_coeffs)
1821 break; //invalid input; blocks should end with EOB
1826 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
1827 cnt[band][nnz][1]++;
1831 // fill in p[3-10] (model fill) - only once per frame for each pos
1833 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
1835 cnt[band][nnz][2]++;
1836 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
1837 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
1838 cache[rc] = val = 2;
1840 val = 3 + vp56_rac_get_prob(c, tp[5]);
1843 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
1845 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
1846 val = 5 + vp56_rac_get_prob(c, 159);
1848 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
1849 val += vp56_rac_get_prob(c, 145);
1853 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
1854 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
1855 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
1856 val += (vp56_rac_get_prob(c, 148) << 1);
1857 val += vp56_rac_get_prob(c, 140);
1859 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
1860 val += (vp56_rac_get_prob(c, 155) << 2);
1861 val += (vp56_rac_get_prob(c, 140) << 1);
1862 val += vp56_rac_get_prob(c, 135);
1864 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
1865 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
1866 val += (vp56_rac_get_prob(c, 157) << 3);
1867 val += (vp56_rac_get_prob(c, 141) << 2);
1868 val += (vp56_rac_get_prob(c, 134) << 1);
1869 val += vp56_rac_get_prob(c, 130);
1871 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
1872 val += (vp56_rac_get_prob(c, 254) << 12);
1873 val += (vp56_rac_get_prob(c, 254) << 11);
1874 val += (vp56_rac_get_prob(c, 252) << 10);
1875 val += (vp56_rac_get_prob(c, 249) << 9);
1876 val += (vp56_rac_get_prob(c, 243) << 8);
1877 val += (vp56_rac_get_prob(c, 230) << 7);
1878 val += (vp56_rac_get_prob(c, 196) << 6);
1879 val += (vp56_rac_get_prob(c, 177) << 5);
1880 val += (vp56_rac_get_prob(c, 153) << 4);
1881 val += (vp56_rac_get_prob(c, 140) << 3);
1882 val += (vp56_rac_get_prob(c, 133) << 2);
1883 val += (vp56_rac_get_prob(c, 130) << 1);
1884 val += vp56_rac_get_prob(c, 129);
1889 band_left = band_counts[++band];
1890 if (tx == TX_32X32) // FIXME slow
1891 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
1893 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
1894 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
1896 } while (++i < n_coeffs);
1901 static int decode_coeffs(AVCodecContext *ctx)
1903 VP9Context *s = ctx->priv_data;
1904 VP9Block *const b = &s->b;
1905 int row = b->row, col = b->col;
1906 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
1907 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
1908 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
1909 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
1910 int end_x = FFMIN(2 * (s->cols - col), w4);
1911 int end_y = FFMIN(2 * (s->rows - row), h4);
1912 int n, pl, x, y, step1d = 1 << b->tx, step = 1 << (b->tx * 2);
1913 int uvstep1d = 1 << b->uvtx, uvstep = 1 << (b->uvtx * 2), res;
1914 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
1915 int tx = 4 * s->lossless + b->tx;
1916 const int16_t * const *yscans = vp9_scans[tx];
1917 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
1918 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
1919 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
1920 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
1921 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
1922 static const int16_t band_counts[4][8] = {
1923 { 1, 2, 3, 4, 3, 16 - 13 },
1924 { 1, 2, 3, 4, 11, 64 - 21 },
1925 { 1, 2, 3, 4, 11, 256 - 21 },
1926 { 1, 2, 3, 4, 11, 1024 - 21 },
1928 const int16_t *y_band_counts = band_counts[b->tx];
1929 const int16_t *uv_band_counts = band_counts[b->uvtx];
1932 if (b->tx > TX_4X4) { // FIXME slow
1933 for (y = 0; y < end_y; y += step1d)
1934 for (x = 1; x < step1d; x++)
1936 for (x = 0; x < end_x; x += step1d)
1937 for (y = 1; y < step1d; y++)
1940 for (n = 0, y = 0; y < end_y; y += step1d) {
1941 for (x = 0; x < end_x; x += step1d, n += step) {
1942 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[b->tx == TX_4X4 &&
1945 int nnz = a[x] + l[y];
1946 if ((res = decode_coeffs_b(&s->c, s->block + 16 * n, 16 * step,
1947 b->tx, c, e, p, nnz, yscans[txtp],
1948 ynbs[txtp], y_band_counts, qmul[0])) < 0)
1950 a[x] = l[y] = !!res;
1951 if (b->tx > TX_8X8) {
1952 AV_WN16A(&s->eob[n], res);
1958 if (b->tx > TX_4X4) { // FIXME slow
1959 for (y = 0; y < end_y; y += step1d)
1960 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, step1d - 1));
1961 for (x = 0; x < end_x; x += step1d)
1962 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, step1d - 1));
1965 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
1966 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
1967 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
1972 for (pl = 0; pl < 2; pl++) {
1973 a = &s->above_uv_nnz_ctx[pl][col];
1974 l = &s->left_uv_nnz_ctx[pl][row & 7];
1975 if (b->uvtx > TX_4X4) { // FIXME slow
1976 for (y = 0; y < end_y; y += uvstep1d)
1977 for (x = 1; x < uvstep1d; x++)
1979 for (x = 0; x < end_x; x += uvstep1d)
1980 for (y = 1; y < uvstep1d; y++)
1983 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
1984 for (x = 0; x < end_x; x += uvstep1d, n += uvstep) {
1985 int nnz = a[x] + l[y];
1986 if ((res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n,
1987 16 * uvstep, b->uvtx, c, e, p, nnz,
1988 uvscan, uvnb, uv_band_counts,
1991 a[x] = l[y] = !!res;
1992 if (b->uvtx > TX_8X8) {
1993 AV_WN16A(&s->uveob[pl][n], res);
1995 s->uveob[pl][n] = res;
1999 if (b->uvtx > TX_4X4) { // FIXME slow
2000 for (y = 0; y < end_y; y += uvstep1d)
2001 memset(&l[y + 1], l[y], FFMIN(end_y - y - 1, uvstep1d - 1));
2002 for (x = 0; x < end_x; x += uvstep1d)
2003 memset(&a[x + 1], a[x], FFMIN(end_x - x - 1, uvstep1d - 1));
2010 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2011 uint8_t *dst_edge, ptrdiff_t stride_edge,
2012 uint8_t *dst_inner, ptrdiff_t stride_inner,
2013 uint8_t *l, int col, int x, int w,
2014 int row, int y, enum TxfmMode tx,
2017 int have_top = row > 0 || y > 0;
2018 int have_left = col > s->tiling.tile_col_start || x > 0;
2019 int have_right = x < w - 1;
2020 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2021 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2022 { DC_127_PRED, VERT_PRED } },
2023 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2024 { HOR_PRED, HOR_PRED } },
2025 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2026 { LEFT_DC_PRED, DC_PRED } },
2027 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2028 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2029 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2030 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2031 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2032 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2033 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2034 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2035 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2036 { DC_127_PRED, VERT_LEFT_PRED } },
2037 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2038 { HOR_UP_PRED, HOR_UP_PRED } },
2039 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2040 { HOR_PRED, TM_VP8_PRED } },
2042 static const struct {
2043 uint8_t needs_left:1;
2044 uint8_t needs_top:1;
2045 uint8_t needs_topleft:1;
2046 uint8_t needs_topright:1;
2047 } edges[N_INTRA_PRED_MODES] = {
2048 [VERT_PRED] = { .needs_top = 1 },
2049 [HOR_PRED] = { .needs_left = 1 },
2050 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2051 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2052 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2053 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2054 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2055 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2056 [HOR_UP_PRED] = { .needs_left = 1 },
2057 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2058 [LEFT_DC_PRED] = { .needs_left = 1 },
2059 [TOP_DC_PRED] = { .needs_top = 1 },
2060 [DC_128_PRED] = { 0 },
2061 [DC_127_PRED] = { 0 },
2062 [DC_129_PRED] = { 0 }
2065 av_assert2(mode >= 0 && mode < 10);
2066 mode = mode_conv[mode][have_left][have_top];
2067 if (edges[mode].needs_top) {
2068 uint8_t *top, *topleft;
2069 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2070 int n_px_need_tr = 0;
2072 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2075 // if top of sb64-row, use s->intra_pred_data[] instead of
2076 // dst[-stride] for intra prediction (it contains pre- instead of
2077 // post-loopfilter data)
2079 top = !(row & 7) && !y ?
2080 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2081 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2083 topleft = !(row & 7) && !y ?
2084 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2085 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2086 &dst_inner[-stride_inner];
2090 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2091 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2092 n_px_need + n_px_need_tr <= n_px_have) {
2096 if (n_px_need <= n_px_have) {
2097 memcpy(*a, top, n_px_need);
2099 memcpy(*a, top, n_px_have);
2100 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2101 n_px_need - n_px_have);
2104 memset(*a, 127, n_px_need);
2106 if (edges[mode].needs_topleft) {
2107 if (have_left && have_top) {
2108 (*a)[-1] = topleft[-1];
2110 (*a)[-1] = have_top ? 129 : 127;
2113 if (tx == TX_4X4 && edges[mode].needs_topright) {
2114 if (have_top && have_right &&
2115 n_px_need + n_px_need_tr <= n_px_have) {
2116 memcpy(&(*a)[4], &top[4], 4);
2118 memset(&(*a)[4], (*a)[3], 4);
2123 if (edges[mode].needs_left) {
2125 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2126 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2127 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2129 if (n_px_need <= n_px_have) {
2130 for (i = 0; i < n_px_need; i++)
2131 l[i] = dst[i * stride - 1];
2133 for (i = 0; i < n_px_have; i++)
2134 l[i] = dst[i * stride - 1];
2135 memset(&l[i], l[i - 1], n_px_need - n_px_have);
2138 memset(l, 129, 4 << tx);
2145 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2147 VP9Context *s = ctx->priv_data;
2148 VP9Block *const b = &s->b;
2149 int row = b->row, col = b->col;
2150 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2151 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2152 int end_x = FFMIN(2 * (s->cols - col), w4);
2153 int end_y = FFMIN(2 * (s->rows - row), h4);
2154 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2155 int uvstep1d = 1 << b->uvtx, p;
2156 uint8_t *dst = b->dst[0], *dst_r = s->f->data[0] + y_off;
2158 for (n = 0, y = 0; y < end_y; y += step1d) {
2159 uint8_t *ptr = dst, *ptr_r = dst_r;
2160 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2161 ptr_r += 4 * step1d, n += step) {
2162 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2164 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2165 uint8_t *a = &a_buf[16], l[32];
2166 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2167 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2169 mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[0],
2170 ptr, b->y_stride, l,
2171 col, x, w4, row, y, b->tx, 0);
2172 s->dsp.intra_pred[b->tx][mode](ptr, b->y_stride, l, a);
2174 s->dsp.itxfm_add[tx][txtp](ptr, b->y_stride,
2175 s->block + 16 * n, eob);
2177 dst_r += 4 * s->f->linesize[0] * step1d;
2178 dst += 4 * b->y_stride * step1d;
2186 step = 1 << (b->uvtx * 2);
2187 for (p = 0; p < 2; p++) {
2188 dst = b->dst[1 + p];
2189 dst_r = s->f->data[1 + p] + uv_off;
2190 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2191 uint8_t *ptr = dst, *ptr_r = dst_r;
2192 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2193 ptr_r += 4 * uvstep1d, n += step) {
2194 int mode = b->uvmode;
2195 LOCAL_ALIGNED_16(uint8_t, a_buf, [48]);
2196 uint8_t *a = &a_buf[16], l[32];
2197 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2199 mode = check_intra_mode(s, mode, &a, ptr_r, s->f->linesize[1],
2200 ptr, b->uv_stride, l,
2201 col, x, w4, row, y, b->uvtx, p + 1);
2202 s->dsp.intra_pred[b->uvtx][mode](ptr, b->uv_stride, l, a);
2204 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2205 s->uvblock[p] + 16 * n, eob);
2207 dst_r += 4 * uvstep1d * s->f->linesize[1];
2208 dst += 4 * uvstep1d * b->uv_stride;
2213 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2214 uint8_t *dst, ptrdiff_t dst_stride,
2215 const uint8_t *ref, ptrdiff_t ref_stride,
2216 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2217 int bw, int bh, int w, int h)
2219 int mx = mv->x, my = mv->y;
2223 ref += y * ref_stride + x;
2226 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2227 if (x < !!mx * 3 || y < !!my * 3 ||
2228 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2229 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2230 ref - !!my * 3 * ref_stride - !!mx * 3,
2232 bw + !!mx * 7, bh + !!my * 7,
2233 x - !!mx * 3, y - !!my * 3, w, h);
2234 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2237 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2240 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2241 uint8_t *dst_u, uint8_t *dst_v,
2242 ptrdiff_t dst_stride,
2243 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2244 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2245 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2246 int bw, int bh, int w, int h)
2248 int mx = mv->x, my = mv->y;
2252 ref_u += y * src_stride_u + x;
2253 ref_v += y * src_stride_v + x;
2256 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2257 if (x < !!mx * 3 || y < !!my * 3 ||
2258 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2259 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2260 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2262 bw + !!mx * 7, bh + !!my * 7,
2263 x - !!mx * 3, y - !!my * 3, w, h);
2264 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2265 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2267 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2268 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2270 bw + !!mx * 7, bh + !!my * 7,
2271 x - !!mx * 3, y - !!my * 3, w, h);
2272 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2273 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2275 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2276 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2280 static void inter_recon(AVCodecContext *ctx)
2282 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2283 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2284 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2286 VP9Context *s = ctx->priv_data;
2287 VP9Block *const b = &s->b;
2288 int row = b->row, col = b->col;
2289 AVFrame *ref1 = s->refs[s->refidx[b->ref[0]]];
2290 AVFrame *ref2 = b->comp ? s->refs[s->refidx[b->ref[1]]] : NULL;
2291 int w = ctx->width, h = ctx->height;
2292 ptrdiff_t ls_y = b->y_stride, ls_uv = b->uv_stride;
2295 if (b->bs > BS_8x8) {
2296 if (b->bs == BS_8x4) {
2297 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], b->dst[0], ls_y,
2298 ref1->data[0], ref1->linesize[0],
2299 row << 3, col << 3, &b->mv[0][0], 8, 4, w, h);
2300 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2301 b->dst[0] + 4 * ls_y, ls_y,
2302 ref1->data[0], ref1->linesize[0],
2303 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w, h);
2306 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], b->dst[0], ls_y,
2307 ref2->data[0], ref2->linesize[0],
2308 row << 3, col << 3, &b->mv[0][1], 8, 4, w, h);
2309 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2310 b->dst[0] + 4 * ls_y, ls_y,
2311 ref2->data[0], ref2->linesize[0],
2312 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w, h);
2314 } else if (b->bs == BS_4x8) {
2315 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2316 ref1->data[0], ref1->linesize[0],
2317 row << 3, col << 3, &b->mv[0][0], 4, 8, w, h);
2318 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2319 ref1->data[0], ref1->linesize[0],
2320 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w, h);
2323 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2324 ref2->data[0], ref2->linesize[0],
2325 row << 3, col << 3, &b->mv[0][1], 4, 8, w, h);
2326 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2327 ref2->data[0], ref2->linesize[0],
2328 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w, h);
2331 av_assert2(b->bs == BS_4x4);
2333 // FIXME if two horizontally adjacent blocks have the same MV,
2334 // do a w8 instead of a w4 call
2335 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0], ls_y,
2336 ref1->data[0], ref1->linesize[0],
2337 row << 3, col << 3, &b->mv[0][0], 4, 4, w, h);
2338 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], b->dst[0] + 4, ls_y,
2339 ref1->data[0], ref1->linesize[0],
2340 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w, h);
2341 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2342 b->dst[0] + 4 * ls_y, ls_y,
2343 ref1->data[0], ref1->linesize[0],
2344 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w, h);
2345 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2346 b->dst[0] + 4 * ls_y + 4, ls_y,
2347 ref1->data[0], ref1->linesize[0],
2348 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w, h);
2351 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0], ls_y,
2352 ref2->data[0], ref2->linesize[0],
2353 row << 3, col << 3, &b->mv[0][1], 4, 4, w, h);
2354 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], b->dst[0] + 4, ls_y,
2355 ref2->data[0], ref2->linesize[0],
2356 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w, h);
2357 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2358 b->dst[0] + 4 * ls_y, ls_y,
2359 ref2->data[0], ref2->linesize[0],
2360 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w, h);
2361 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2362 b->dst[0] + 4 * ls_y + 4, ls_y,
2363 ref2->data[0], ref2->linesize[0],
2364 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w, h);
2368 int bwl = bwlog_tab[0][b->bs];
2369 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2371 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], b->dst[0], ls_y,
2372 ref1->data[0], ref1->linesize[0],
2373 row << 3, col << 3, &b->mv[0][0],bw, bh, w, h);
2376 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], b->dst[0], ls_y,
2377 ref2->data[0], ref2->linesize[0],
2378 row << 3, col << 3, &b->mv[0][1], bw, bh, w, h);
2383 int bwl = bwlog_tab[1][b->bs];
2384 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2389 if (b->bs > BS_8x8) {
2390 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2391 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2396 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2397 b->dst[1], b->dst[2], ls_uv,
2398 ref1->data[1], ref1->linesize[1],
2399 ref1->data[2], ref1->linesize[2],
2400 row << 2, col << 2, &mvuv, bw, bh, w, h);
2403 if (b->bs > BS_8x8) {
2404 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2405 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2409 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2410 b->dst[1], b->dst[2], ls_uv,
2411 ref2->data[1], ref2->linesize[1],
2412 ref2->data[2], ref2->linesize[2],
2413 row << 2, col << 2, &mvuv, bw, bh, w, h);
2418 /* mostly copied intra_reconn() */
2420 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2421 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2422 int end_x = FFMIN(2 * (s->cols - col), w4);
2423 int end_y = FFMIN(2 * (s->rows - row), h4);
2424 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2425 int uvstep1d = 1 << b->uvtx, p;
2426 uint8_t *dst = b->dst[0];
2429 for (n = 0, y = 0; y < end_y; y += step1d) {
2431 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2432 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2435 s->dsp.itxfm_add[tx][DCT_DCT](ptr, b->y_stride,
2436 s->block + 16 * n, eob);
2438 dst += 4 * b->y_stride * step1d;
2446 step = 1 << (b->uvtx * 2);
2447 for (p = 0; p < 2; p++) {
2448 dst = b->dst[p + 1];
2449 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2451 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2452 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2455 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, b->uv_stride,
2456 s->uvblock[p] + 16 * n, eob);
2458 dst += 4 * uvstep1d * b->uv_stride;
2464 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2465 int row_and_7, int col_and_7,
2466 int w, int h, int col_end, int row_end,
2467 enum TxfmMode tx, int skip_inter)
2469 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2470 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2471 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2472 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2474 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2475 // edges. This means that for UV, we work on two subsampled blocks at
2476 // a time, and we only use the topleft block's mode information to set
2477 // things like block strength. Thus, for any block size smaller than
2478 // 16x16, ignore the odd portion of the block.
2479 if (tx == TX_4X4 && is_uv) {
2494 if (tx == TX_4X4 && !skip_inter) {
2495 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2496 int m_col_odd = (t << (w - 1)) - t;
2498 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2500 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2502 for (y = row_and_7; y < h + row_and_7; y++) {
2503 int col_mask_id = 2 - !(y & 7);
2505 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2506 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2507 // for odd lines, if the odd col is not being filtered,
2508 // skip odd row also:
2515 // if a/c are even row/col and b/d are odd, and d is skipped,
2516 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2517 if ((col_end & 1) && (y & 1)) {
2518 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2520 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2524 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2526 for (y = row_and_7; y < h + row_and_7; y++) {
2527 int col_mask_id = 2 - !(y & 3);
2529 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2530 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2531 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2532 lflvl->mask[is_uv][0][y][3] |= m_col;
2533 lflvl->mask[is_uv][1][y][3] |= m_col;
2537 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2540 int mask_id = (tx == TX_8X8);
2541 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2542 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2543 int m_row = m_col & masks[l2];
2545 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2546 // 8wd loopfilter to prevent going off the visible edge.
2547 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2548 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2549 int m_row_8 = m_row - m_row_16;
2551 for (y = row_and_7; y < h + row_and_7; y++) {
2552 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2553 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2556 for (y = row_and_7; y < h + row_and_7; y++)
2557 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2560 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2561 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2562 lflvl->mask[is_uv][1][y][0] |= m_col;
2563 if (y - row_and_7 == h - 1)
2564 lflvl->mask[is_uv][1][y][1] |= m_col;
2566 for (y = row_and_7; y < h + row_and_7; y += step1d)
2567 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2569 } else if (tx != TX_4X4) {
2572 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2573 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2574 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2575 for (y = row_and_7; y < h + row_and_7; y++)
2576 lflvl->mask[is_uv][0][y][mask_id] |= t;
2578 int t8 = t & 0x01, t4 = t - t8;
2580 for (y = row_and_7; y < h + row_and_7; y++) {
2581 lflvl->mask[is_uv][0][y][2] |= t4;
2582 lflvl->mask[is_uv][0][y][1] |= t8;
2584 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2586 int t8 = t & 0x11, t4 = t - t8;
2588 for (y = row_and_7; y < h + row_and_7; y++) {
2589 lflvl->mask[is_uv][0][y][2] |= t4;
2590 lflvl->mask[is_uv][0][y][1] |= t8;
2592 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2597 static int decode_b(AVCodecContext *ctx, int row, int col,
2598 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2599 enum BlockLevel bl, enum BlockPartition bp)
2601 VP9Context *s = ctx->priv_data;
2602 VP9Block *const b = &s->b;
2603 enum BlockSize bs = bl * 3 + bp;
2604 int res, y, w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2611 s->min_mv.x = -(128 + col * 64);
2612 s->min_mv.y = -(128 + row * 64);
2613 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2614 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2617 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2620 if ((res = decode_coeffs(ctx)) < 0)
2625 memset(&s->above_y_nnz_ctx[col * 2], 0, w4 * 2);
2626 memset(&s->left_y_nnz_ctx[(row & 7) << 1], 0, h4 * 2);
2627 for (pl = 0; pl < 2; pl++) {
2628 memset(&s->above_uv_nnz_ctx[pl][col], 0, w4);
2629 memset(&s->left_uv_nnz_ctx[pl][row & 7], 0, h4);
2633 // emulated overhangs if the stride of the target buffer can't hold. This
2634 // allows to support emu-edge and so on even if we have large block
2636 emu[0] = (col + w4) * 8 > s->f->linesize[0] ||
2637 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2638 emu[1] = (col + w4) * 4 > s->f->linesize[1] ||
2639 (row + h4) > s->rows + 2 * !(ctx->flags & CODEC_FLAG_EMU_EDGE);
2641 b->dst[0] = s->tmp_y;
2644 b->dst[0] = s->f->data[0] + yoff;
2645 b->y_stride = s->f->linesize[0];
2648 b->dst[1] = s->tmp_uv[0];
2649 b->dst[2] = s->tmp_uv[1];
2652 b->dst[1] = s->f->data[1] + uvoff;
2653 b->dst[2] = s->f->data[2] + uvoff;
2654 b->uv_stride = s->f->linesize[1];
2657 intra_recon(ctx, yoff, uvoff);
2662 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
2664 for (n = 0; o < w; n++) {
2669 s->dsp.mc[n][0][0][0][0](s->f->data[0] + yoff + o, s->f->linesize[0],
2670 s->tmp_y + o, 64, h, 0, 0);
2676 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
2678 for (n = 1; o < w; n++) {
2683 s->dsp.mc[n][0][0][0][0](s->f->data[1] + uvoff + o, s->f->linesize[1],
2684 s->tmp_uv[0] + o, 32, h, 0, 0);
2685 s->dsp.mc[n][0][0][0][0](s->f->data[2] + uvoff + o, s->f->linesize[2],
2686 s->tmp_uv[1] + o, 32, h, 0, 0);
2692 // pick filter level and find edges to apply filter to
2693 if (s->filter.level &&
2694 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
2695 [b->mode[3] != ZEROMV]) > 0) {
2696 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
2697 int skip_inter = !b->intra && b->skip;
2699 for (y = 0; y < h4; y++)
2700 memset(&lflvl->level[((row & 7) + y) * 8 + (col & 7)], lvl, w4);
2701 mask_edges(lflvl, 0, row & 7, col & 7, x_end, y_end, 0, 0, b->tx, skip_inter);
2702 mask_edges(lflvl, 1, row & 7, col & 7, x_end, y_end,
2703 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
2704 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
2705 b->uvtx, skip_inter);
2707 if (!s->filter.lim_lut[lvl]) {
2708 int sharp = s->filter.sharpness;
2712 limit >>= (sharp + 3) >> 2;
2713 limit = FFMIN(limit, 9 - sharp);
2715 limit = FFMAX(limit, 1);
2717 s->filter.lim_lut[lvl] = limit;
2718 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
2725 static int decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
2726 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
2728 VP9Context *s = ctx->priv_data;
2729 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
2730 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1), res;
2731 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
2732 s->prob.p.partition[bl][c];
2733 enum BlockPartition bp;
2734 ptrdiff_t hbs = 4 >> bl;
2737 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2738 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2739 } else if (col + hbs < s->cols) {
2740 if (row + hbs < s->rows) {
2741 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
2743 case PARTITION_NONE:
2744 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2747 if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2748 yoff += hbs * 8 * s->f->linesize[0];
2749 uvoff += hbs * 4 * s->f->linesize[1];
2750 res = decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
2754 if (!(res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp))) {
2757 res = decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
2760 case PARTITION_SPLIT:
2761 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2762 if (!(res = decode_sb(ctx, row, col + hbs, lflvl,
2763 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1))) {
2764 yoff += hbs * 8 * s->f->linesize[0];
2765 uvoff += hbs * 4 * s->f->linesize[1];
2766 if (!(res = decode_sb(ctx, row + hbs, col, lflvl,
2767 yoff, uvoff, bl + 1)))
2768 res = decode_sb(ctx, row + hbs, col + hbs, lflvl,
2769 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2776 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
2777 bp = PARTITION_SPLIT;
2778 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1)))
2779 res = decode_sb(ctx, row, col + hbs, lflvl,
2780 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
2783 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2785 } else if (row + hbs < s->rows) {
2786 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
2787 bp = PARTITION_SPLIT;
2788 if (!(res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1))) {
2789 yoff += hbs * 8 * s->f->linesize[0];
2790 uvoff += hbs * 4 * s->f->linesize[1];
2791 res = decode_sb(ctx, row + hbs, col, lflvl,
2792 yoff, uvoff, bl + 1);
2796 res = decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
2799 bp = PARTITION_SPLIT;
2800 res = decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
2802 s->counts.partition[bl][c][bp]++;
2807 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
2808 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
2810 VP9Context *s = ctx->priv_data;
2811 uint8_t *dst = s->f->data[0] + yoff, *lvl = lflvl->level;
2812 ptrdiff_t ls_y = s->f->linesize[0], ls_uv = s->f->linesize[1];
2815 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
2816 // if you think of them as acting on a 8x8 block max, we can interleave
2817 // each v/h within the single x loop, but that only works if we work on
2818 // 8 pixel blocks, and we won't always do that (we want at least 16px
2819 // to use SSE2 optimizations, perhaps 32 for AVX2)
2821 // filter edges between columns, Y plane (e.g. block1 | block2)
2822 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
2823 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
2824 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
2825 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
2826 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
2827 unsigned hm = hm1 | hm2 | hm13 | hm23;
2829 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
2831 int L = *l, H = L >> 4;
2832 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2835 if (hmask1[0] & x) {
2836 if (hmask2[0] & x) {
2837 av_assert2(l[8] == L);
2838 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
2840 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
2842 } else if (hm2 & x) {
2845 E |= s->filter.mblim_lut[L] << 8;
2846 I |= s->filter.lim_lut[L] << 8;
2847 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2849 [0](ptr, ls_y, E, I, H);
2851 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2852 [0](ptr, ls_y, E, I, H);
2855 } else if (hm2 & x) {
2856 int L = l[8], H = L >> 4;
2857 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2860 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2861 [0](ptr + 8 * ls_y, ls_y, E, I, H);
2865 int L = *l, H = L >> 4;
2866 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2871 E |= s->filter.mblim_lut[L] << 8;
2872 I |= s->filter.lim_lut[L] << 8;
2873 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
2875 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
2877 } else if (hm23 & x) {
2878 int L = l[8], H = L >> 4;
2879 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2881 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
2887 // filter edges between rows, Y plane (e.g. ------)
2889 dst = s->f->data[0] + yoff;
2891 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
2892 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
2893 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
2895 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
2898 int L = *l, H = L >> 4;
2899 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2902 if (vmask[0] & (x << 1)) {
2903 av_assert2(l[1] == L);
2904 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
2906 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
2908 } else if (vm & (x << 1)) {
2911 E |= s->filter.mblim_lut[L] << 8;
2912 I |= s->filter.lim_lut[L] << 8;
2913 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
2914 [!!(vmask[1] & (x << 1))]
2915 [1](ptr, ls_y, E, I, H);
2917 s->dsp.loop_filter_8[!!(vmask[1] & x)]
2918 [1](ptr, ls_y, E, I, H);
2920 } else if (vm & (x << 1)) {
2921 int L = l[1], H = L >> 4;
2922 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2924 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
2925 [1](ptr + 8, ls_y, E, I, H);
2929 int L = *l, H = L >> 4;
2930 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2932 if (vm3 & (x << 1)) {
2935 E |= s->filter.mblim_lut[L] << 8;
2936 I |= s->filter.lim_lut[L] << 8;
2937 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
2939 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
2941 } else if (vm3 & (x << 1)) {
2942 int L = l[1], H = L >> 4;
2943 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2945 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
2950 // same principle but for U/V planes
2951 for (p = 0; p < 2; p++) {
2953 dst = s->f->data[1 + p] + uvoff;
2954 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
2955 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
2956 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
2957 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
2958 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
2960 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
2963 int L = *l, H = L >> 4;
2964 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2966 if (hmask1[0] & x) {
2967 if (hmask2[0] & x) {
2968 av_assert2(l[16] == L);
2969 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
2971 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
2973 } else if (hm2 & x) {
2976 E |= s->filter.mblim_lut[L] << 8;
2977 I |= s->filter.lim_lut[L] << 8;
2978 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
2980 [0](ptr, ls_uv, E, I, H);
2982 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
2983 [0](ptr, ls_uv, E, I, H);
2985 } else if (hm2 & x) {
2986 int L = l[16], H = L >> 4;
2987 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
2989 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
2990 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
2998 dst = s->f->data[1 + p] + uvoff;
2999 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3000 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3001 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3003 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3006 int L = *l, H = L >> 4;
3007 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3010 if (vmask[0] & (x << 2)) {
3011 av_assert2(l[2] == L);
3012 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3014 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3016 } else if (vm & (x << 2)) {
3019 E |= s->filter.mblim_lut[L] << 8;
3020 I |= s->filter.lim_lut[L] << 8;
3021 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3022 [!!(vmask[1] & (x << 2))]
3023 [1](ptr, ls_uv, E, I, H);
3025 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3026 [1](ptr, ls_uv, E, I, H);
3028 } else if (vm & (x << 2)) {
3029 int L = l[2], H = L >> 4;
3030 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3032 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3033 [1](ptr + 8, ls_uv, E, I, H);
3043 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3045 int sb_start = ( idx * n) >> log2_n;
3046 int sb_end = ((idx + 1) * n) >> log2_n;
3047 *start = FFMIN(sb_start, n) << 3;
3048 *end = FFMIN(sb_end, n) << 3;
3051 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3052 int max_count, int update_factor)
3054 unsigned ct = ct0 + ct1, p2, p1;
3060 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3061 p2 = av_clip(p2, 1, 255);
3062 ct = FFMIN(ct, max_count);
3063 update_factor = FASTDIV(update_factor * ct, max_count);
3065 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3066 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3069 static void adapt_probs(VP9Context *s)
3072 prob_context *p = &s->prob_ctx[s->framectxid].p;
3073 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3076 for (i = 0; i < 4; i++)
3077 for (j = 0; j < 2; j++)
3078 for (k = 0; k < 2; k++)
3079 for (l = 0; l < 6; l++)
3080 for (m = 0; m < 6; m++) {
3081 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3082 unsigned *e = s->counts.eob[i][j][k][l][m];
3083 unsigned *c = s->counts.coef[i][j][k][l][m];
3085 if (l == 0 && m >= 3) // dc only has 3 pt
3088 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3089 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3090 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3093 if (s->keyframe || s->intraonly) {
3094 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3095 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3096 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3097 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3102 for (i = 0; i < 3; i++)
3103 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3106 for (i = 0; i < 4; i++)
3107 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3110 if (s->comppredmode == PRED_SWITCHABLE) {
3111 for (i = 0; i < 5; i++)
3112 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3116 if (s->comppredmode != PRED_SINGLEREF) {
3117 for (i = 0; i < 5; i++)
3118 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3119 s->counts.comp_ref[i][1], 20, 128);
3122 if (s->comppredmode != PRED_COMPREF) {
3123 for (i = 0; i < 5; i++) {
3124 uint8_t *pp = p->single_ref[i];
3125 unsigned (*c)[2] = s->counts.single_ref[i];
3127 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3128 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3132 // block partitioning
3133 for (i = 0; i < 4; i++)
3134 for (j = 0; j < 4; j++) {
3135 uint8_t *pp = p->partition[i][j];
3136 unsigned *c = s->counts.partition[i][j];
3138 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3139 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3140 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3144 if (s->txfmmode == TX_SWITCHABLE) {
3145 for (i = 0; i < 2; i++) {
3146 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3148 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3149 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3150 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3151 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3152 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3153 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3157 // interpolation filter
3158 if (s->filtermode == FILTER_SWITCHABLE) {
3159 for (i = 0; i < 4; i++) {
3160 uint8_t *pp = p->filter[i];
3161 unsigned *c = s->counts.filter[i];
3163 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3164 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3169 for (i = 0; i < 7; i++) {
3170 uint8_t *pp = p->mv_mode[i];
3171 unsigned *c = s->counts.mv_mode[i];
3173 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3174 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3175 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3180 uint8_t *pp = p->mv_joint;
3181 unsigned *c = s->counts.mv_joint;
3183 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3184 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3185 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3189 for (i = 0; i < 2; i++) {
3191 unsigned *c, (*c2)[2], sum;
3193 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3194 s->counts.mv_comp[i].sign[1], 20, 128);
3196 pp = p->mv_comp[i].classes;
3197 c = s->counts.mv_comp[i].classes;
3198 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3199 adapt_prob(&pp[0], c[0], sum, 20, 128);
3201 adapt_prob(&pp[1], c[1], sum, 20, 128);
3203 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3204 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3206 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3207 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3209 adapt_prob(&pp[6], c[6], sum, 20, 128);
3210 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3211 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3212 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3214 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3215 s->counts.mv_comp[i].class0[1], 20, 128);
3216 pp = p->mv_comp[i].bits;
3217 c2 = s->counts.mv_comp[i].bits;
3218 for (j = 0; j < 10; j++)
3219 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3221 for (j = 0; j < 2; j++) {
3222 pp = p->mv_comp[i].class0_fp[j];
3223 c = s->counts.mv_comp[i].class0_fp[j];
3224 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3225 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3226 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3228 pp = p->mv_comp[i].fp;
3229 c = s->counts.mv_comp[i].fp;
3230 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3231 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3232 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3234 if (s->highprecisionmvs) {
3235 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3236 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3237 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3238 s->counts.mv_comp[i].hp[1], 20, 128);
3243 for (i = 0; i < 4; i++) {
3244 uint8_t *pp = p->y_mode[i];
3245 unsigned *c = s->counts.y_mode[i], sum, s2;
3247 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3248 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3249 sum -= c[TM_VP8_PRED];
3250 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3251 sum -= c[VERT_PRED];
3252 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3253 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3255 adapt_prob(&pp[3], s2, sum, 20, 128);
3257 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3258 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3259 sum -= c[DIAG_DOWN_LEFT_PRED];
3260 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3261 sum -= c[VERT_LEFT_PRED];
3262 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3263 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3267 for (i = 0; i < 10; i++) {
3268 uint8_t *pp = p->uv_mode[i];
3269 unsigned *c = s->counts.uv_mode[i], sum, s2;
3271 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3272 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3273 sum -= c[TM_VP8_PRED];
3274 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3275 sum -= c[VERT_PRED];
3276 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3277 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3279 adapt_prob(&pp[3], s2, sum, 20, 128);
3281 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3282 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3283 sum -= c[DIAG_DOWN_LEFT_PRED];
3284 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3285 sum -= c[VERT_LEFT_PRED];
3286 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3287 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3291 static int vp9_decode_frame(AVCodecContext *ctx, void *out_pic,
3292 int *got_frame, const uint8_t *data, int size)
3294 VP9Context *s = ctx->priv_data;
3295 int res, tile_row, tile_col, i, ref, row, col;
3296 ptrdiff_t yoff = 0, uvoff = 0;
3297 //AVFrame *prev_frame = s->f; // for segmentation map
3299 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3301 } else if (res == 0) {
3302 if (!s->refs[ref]) {
3303 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3304 return AVERROR_INVALIDDATA;
3306 if ((res = av_frame_ref(out_pic, s->refs[ref])) < 0)
3314 // discard old references
3315 for (i = 0; i < 10; i++) {
3316 AVFrame *f = s->fb[i];
3317 if (f->data[0] && f != s->f &&
3318 f != s->refs[0] && f != s->refs[1] &&
3319 f != s->refs[2] && f != s->refs[3] &&
3320 f != s->refs[4] && f != s->refs[5] &&
3321 f != s->refs[6] && f != s->refs[7])
3325 // find unused reference
3326 for (i = 0; i < 10; i++)
3327 if (!s->fb[i]->data[0])
3331 if ((res = ff_get_buffer(ctx, s->f,
3332 s->refreshrefmask ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
3334 s->f->key_frame = s->keyframe;
3335 s->f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3337 // main tile decode loop
3338 memset(s->above_partition_ctx, 0, s->cols);
3339 memset(s->above_skip_ctx, 0, s->cols);
3340 if (s->keyframe || s->intraonly) {
3341 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3343 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3345 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3346 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3347 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3348 memset(s->above_segpred_ctx, 0, s->cols);
3349 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3350 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3351 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3352 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3355 if (tile_col == s->tiling.tile_cols - 1 &&
3356 tile_row == s->tiling.tile_rows - 1) {
3359 tile_size = AV_RB32(data);
3363 if (tile_size > size)
3364 return AVERROR_INVALIDDATA;
3365 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3366 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) // marker bit
3367 return AVERROR_INVALIDDATA;
3372 for (row = s->tiling.tile_row_start;
3373 row < s->tiling.tile_row_end;
3374 row += 8, yoff += s->f->linesize[0] * 64,
3375 uvoff += s->f->linesize[1] * 32) {
3376 struct VP9Filter *lflvl_ptr = s->lflvl;
3377 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3379 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3380 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3381 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3383 memset(s->left_partition_ctx, 0, 8);
3384 memset(s->left_skip_ctx, 0, 8);
3385 if (s->keyframe || s->intraonly) {
3386 memset(s->left_mode_ctx, DC_PRED, 16);
3388 memset(s->left_mode_ctx, NEARESTMV, 8);
3390 memset(s->left_y_nnz_ctx, 0, 16);
3391 memset(s->left_uv_nnz_ctx, 0, 16);
3392 memset(s->left_segpred_ctx, 0, 8);
3394 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3395 for (col = s->tiling.tile_col_start;
3396 col < s->tiling.tile_col_end;
3397 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3398 // FIXME integrate with lf code (i.e. zero after each
3399 // use, similar to invtxfm coefficients, or similar)
3400 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3402 if ((res = decode_sb(ctx, row, col, lflvl_ptr,
3403 yoff2, uvoff2, BL_64X64)) < 0)
3406 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3409 // backup pre-loopfilter reconstruction data for intra
3410 // prediction of next row of sb64s
3411 if (row + 8 < s->rows) {
3412 memcpy(s->intra_pred_data[0],
3413 s->f->data[0] + yoff + 63 * s->f->linesize[0],
3415 memcpy(s->intra_pred_data[1],
3416 s->f->data[1] + uvoff + 31 * s->f->linesize[1],
3418 memcpy(s->intra_pred_data[2],
3419 s->f->data[2] + uvoff + 31 * s->f->linesize[2],
3423 // loopfilter one row
3424 if (s->filter.level) {
3427 lflvl_ptr = s->lflvl;
3428 for (col = 0; col < s->cols;
3429 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3430 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3436 // bw adaptivity (or in case of parallel decoding mode, fw adaptivity
3437 // probability maintenance between frames)
3438 if (s->refreshctx) {
3439 if (s->parallelmode) {
3442 for (i = 0; i < 4; i++)
3443 for (j = 0; j < 2; j++)
3444 for (k = 0; k < 2; k++)
3445 for (l = 0; l < 6; l++)
3446 for (m = 0; m < 6; m++)
3447 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3448 s->prob.coef[i][j][k][l][m], 3);
3449 s->prob_ctx[s->framectxid].p = s->prob.p;
3454 FFSWAP(struct VP9mvrefPair *, s->mv[0], s->mv[1]);
3457 for (i = 0; i < 8; i++)
3458 if (s->refreshrefmask & (1 << i))
3461 if (!s->invisible) {
3462 if ((res = av_frame_ref(out_pic, s->f)) < 0)
3470 static int vp9_decode_packet(AVCodecContext *avctx, void *out_pic,
3471 int *got_frame, AVPacket *avpkt)
3473 const uint8_t *data = avpkt->data;
3474 int size = avpkt->size, marker, res;
3476 // read superframe index - this is a collection of individual frames that
3477 // together lead to one visible frame
3478 av_assert1(size > 0); // without CODEC_CAP_DELAY, this is implied
3479 marker = data[size - 1];
3480 if ((marker & 0xe0) == 0xc0) {
3481 int nbytes = 1 + ((marker >> 3) & 0x3);
3482 int n_frames = 1 + (marker & 0x7), idx_sz = 2 + n_frames * nbytes;
3484 if (size >= idx_sz && data[size - idx_sz] == marker) {
3485 const uint8_t *idx = data + size + 1 - idx_sz;
3487 #define case_n(a, rd) \
3489 while (n_frames--) { \
3493 av_log(avctx, AV_LOG_ERROR, \
3494 "Superframe packet size too big: %d > %d\n", \
3496 return AVERROR_INVALIDDATA; \
3498 res = vp9_decode_frame(avctx, out_pic, got_frame, \
3507 case_n(2, AV_RL16(idx));
3508 case_n(3, AV_RL24(idx));
3509 case_n(4, AV_RL32(idx));
3514 // if we get here, there was no valid superframe index, i.e. this is just
3515 // one whole single frame - decode it as such from the complete input buf
3516 if ((res = vp9_decode_frame(avctx, out_pic, got_frame, data, size)) < 0)
3521 static void vp9_decode_flush(AVCodecContext *ctx)
3523 VP9Context *s = ctx->priv_data;
3526 for (i = 0; i < 10; i++)
3527 if (s->fb[i]->data[0])
3528 av_frame_unref(s->fb[i]);
3529 for (i = 0; i < 8; i++)
3534 static av_cold int vp9_decode_init(AVCodecContext *ctx)
3536 VP9Context *s = ctx->priv_data;
3539 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
3540 ff_vp9dsp_init(&s->dsp);
3541 ff_videodsp_init(&s->vdsp, 8);
3542 for (i = 0; i < 10; i++) {
3543 s->fb[i] = av_frame_alloc();
3545 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3546 return AVERROR(ENOMEM);
3549 s->filter.sharpness = -1;
3554 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3556 VP9Context *s = ctx->priv_data;
3559 for (i = 0; i < 10; i++) {
3560 if (s->fb[i]->data[0])
3561 av_frame_unref(s->fb[i]);
3562 av_frame_free(&s->fb[i]);
3564 av_freep(&s->above_partition_ctx);
3570 AVCodec ff_vp9_decoder = {
3572 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
3573 .type = AVMEDIA_TYPE_VIDEO,
3574 .id = AV_CODEC_ID_VP9,
3575 .priv_data_size = sizeof(VP9Context),
3576 .init = vp9_decode_init,
3577 .close = vp9_decode_free,
3578 .decode = vp9_decode_packet,
3579 .capabilities = CODEC_CAP_DR1,
3580 .flush = vp9_decode_flush,