2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
33 #include "libavutil/avassert.h"
35 #define VP9_SYNCCODE 0x498342
72 typedef struct VP9Frame {
74 AVBufferRef *extradata;
75 uint8_t *segmentation_map;
76 struct VP9mvrefPair *mv;
81 uint8_t /* bit=col */ mask[2 /* 0=y, 1=uv */][2 /* 0=col, 1=row */]
82 [8 /* rows */][4 /* 0=16, 1=8, 2=4, 3=inner4 */];
85 typedef struct VP9Block {
86 uint8_t seg_id, intra, comp, ref[2], mode[4], uvmode, skip;
87 enum FilterMode filter;
88 VP56mv mv[4 /* b_idx */][2 /* ref */];
90 enum TxfmMode tx, uvtx;
92 enum BlockPartition bp;
95 typedef struct VP9Context {
102 VP9Block *b_base, *b;
103 int pass, uses_2pass, last_uses_2pass;
104 int row, row7, col, col7;
106 ptrdiff_t y_stride, uv_stride;
110 uint8_t keyframe, last_keyframe;
112 uint8_t use_last_frame_mvs;
118 uint8_t refreshrefmask;
119 uint8_t highprecisionmvs;
120 enum FilterMode filtermode;
121 uint8_t allowcompinter;
124 uint8_t parallelmode;
128 uint8_t varcompref[2];
129 ThreadFrame refs[8], next_refs[8];
138 uint8_t mblim_lut[64];
146 int8_t ydc_qdelta, uvdc_qdelta, uvac_qdelta;
151 uint8_t absolute_vals;
157 uint8_t skip_enabled;
166 unsigned log2_tile_cols, log2_tile_rows;
167 unsigned tile_cols, tile_rows;
168 unsigned tile_row_start, tile_row_end, tile_col_start, tile_col_end;
170 unsigned sb_cols, sb_rows, rows, cols;
173 uint8_t coef[4][2][2][6][6][3];
177 uint8_t coef[4][2][2][6][6][11];
182 unsigned y_mode[4][10];
183 unsigned uv_mode[10][10];
184 unsigned filter[4][3];
185 unsigned mv_mode[7][4];
186 unsigned intra[4][2];
188 unsigned single_ref[5][2][2];
189 unsigned comp_ref[5][2];
190 unsigned tx32p[2][4];
191 unsigned tx16p[2][3];
194 unsigned mv_joint[4];
197 unsigned classes[11];
199 unsigned bits[10][2];
200 unsigned class0_fp[2][4];
202 unsigned class0_hp[2];
205 unsigned partition[4][4][4];
206 unsigned coef[4][2][2][6][6][3];
207 unsigned eob[4][2][2][6][6][2];
209 enum TxfmMode txfmmode;
210 enum CompPredMode comppredmode;
212 // contextual (left/above) cache
213 DECLARE_ALIGNED(16, uint8_t, left_y_nnz_ctx)[16];
214 DECLARE_ALIGNED(16, uint8_t, left_mode_ctx)[16];
215 DECLARE_ALIGNED(16, VP56mv, left_mv_ctx)[16][2];
216 DECLARE_ALIGNED(8, uint8_t, left_uv_nnz_ctx)[2][8];
217 DECLARE_ALIGNED(8, uint8_t, left_partition_ctx)[8];
218 DECLARE_ALIGNED(8, uint8_t, left_skip_ctx)[8];
219 DECLARE_ALIGNED(8, uint8_t, left_txfm_ctx)[8];
220 DECLARE_ALIGNED(8, uint8_t, left_segpred_ctx)[8];
221 DECLARE_ALIGNED(8, uint8_t, left_intra_ctx)[8];
222 DECLARE_ALIGNED(8, uint8_t, left_comp_ctx)[8];
223 DECLARE_ALIGNED(8, uint8_t, left_ref_ctx)[8];
224 DECLARE_ALIGNED(8, uint8_t, left_filter_ctx)[8];
225 uint8_t *above_partition_ctx;
226 uint8_t *above_mode_ctx;
227 // FIXME maybe merge some of the below in a flags field?
228 uint8_t *above_y_nnz_ctx;
229 uint8_t *above_uv_nnz_ctx[2];
230 uint8_t *above_skip_ctx; // 1bit
231 uint8_t *above_txfm_ctx; // 2bit
232 uint8_t *above_segpred_ctx; // 1bit
233 uint8_t *above_intra_ctx; // 1bit
234 uint8_t *above_comp_ctx; // 1bit
235 uint8_t *above_ref_ctx; // 2bit
236 uint8_t *above_filter_ctx;
237 VP56mv (*above_mv_ctx)[2];
240 uint8_t *intra_pred_data[3];
241 struct VP9Filter *lflvl;
242 DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[71*80];
244 // block reconstruction intermediates
245 int block_alloc_using_2pass;
246 int16_t *block_base, *block, *uvblock_base[2], *uvblock[2];
247 uint8_t *eob_base, *uveob_base[2], *eob, *uveob[2];
248 struct { int x, y; } min_mv, max_mv;
249 DECLARE_ALIGNED(32, uint8_t, tmp_y)[64*64];
250 DECLARE_ALIGNED(32, uint8_t, tmp_uv)[2][32*32];
253 static const uint8_t bwh_tab[2][N_BS_SIZES][2] = {
255 { 16, 16 }, { 16, 8 }, { 8, 16 }, { 8, 8 }, { 8, 4 }, { 4, 8 },
256 { 4, 4 }, { 4, 2 }, { 2, 4 }, { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 },
258 { 8, 8 }, { 8, 4 }, { 4, 8 }, { 4, 4 }, { 4, 2 }, { 2, 4 },
259 { 2, 2 }, { 2, 1 }, { 1, 2 }, { 1, 1 }, { 1, 1 }, { 1, 1 }, { 1, 1 },
263 static int vp9_alloc_frame(AVCodecContext *ctx, VP9Frame *f)
265 VP9Context *s = ctx->priv_data;
268 if ((ret = ff_thread_get_buffer(ctx, &f->tf, AV_GET_BUFFER_FLAG_REF)) < 0)
270 sz = 64 * s->sb_cols * s->sb_rows;
271 if (!(f->extradata = av_buffer_allocz(sz * (1 + sizeof(struct VP9mvrefPair))))) {
272 ff_thread_release_buffer(ctx, &f->tf);
273 return AVERROR(ENOMEM);
276 f->segmentation_map = f->extradata->data;
277 f->mv = (struct VP9mvrefPair *) (f->extradata->data + sz);
279 // retain segmentation map if it doesn't update
280 if (s->segmentation.enabled && !s->segmentation.update_map &&
281 !s->intraonly && !s->keyframe) {
282 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
288 static void vp9_unref_frame(AVCodecContext *ctx, VP9Frame *f)
290 ff_thread_release_buffer(ctx, &f->tf);
291 av_buffer_unref(&f->extradata);
294 static int vp9_ref_frame(AVCodecContext *ctx, VP9Frame *dst, VP9Frame *src)
298 if ((res = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0) {
300 } else if (!(dst->extradata = av_buffer_ref(src->extradata))) {
301 vp9_unref_frame(ctx, dst);
302 return AVERROR(ENOMEM);
305 dst->segmentation_map = src->segmentation_map;
311 static int update_size(AVCodecContext *ctx, int w, int h)
313 VP9Context *s = ctx->priv_data;
316 av_assert0(w > 0 && h > 0);
318 if (s->intra_pred_data[0] && w == ctx->width && h == ctx->height)
323 s->sb_cols = (w + 63) >> 6;
324 s->sb_rows = (h + 63) >> 6;
325 s->cols = (w + 7) >> 3;
326 s->rows = (h + 7) >> 3;
328 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
329 av_freep(&s->intra_pred_data[0]);
330 p = av_malloc(s->sb_cols * (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
332 return AVERROR(ENOMEM);
333 assign(s->intra_pred_data[0], uint8_t *, 64);
334 assign(s->intra_pred_data[1], uint8_t *, 32);
335 assign(s->intra_pred_data[2], uint8_t *, 32);
336 assign(s->above_y_nnz_ctx, uint8_t *, 16);
337 assign(s->above_mode_ctx, uint8_t *, 16);
338 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
339 assign(s->above_partition_ctx, uint8_t *, 8);
340 assign(s->above_skip_ctx, uint8_t *, 8);
341 assign(s->above_txfm_ctx, uint8_t *, 8);
342 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
343 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
344 assign(s->above_segpred_ctx, uint8_t *, 8);
345 assign(s->above_intra_ctx, uint8_t *, 8);
346 assign(s->above_comp_ctx, uint8_t *, 8);
347 assign(s->above_ref_ctx, uint8_t *, 8);
348 assign(s->above_filter_ctx, uint8_t *, 8);
349 assign(s->lflvl, struct VP9Filter *, 1);
352 // these will be re-allocated a little later
353 av_freep(&s->b_base);
354 av_freep(&s->block_base);
359 static int update_block_buffers(AVCodecContext *ctx)
361 VP9Context *s = ctx->priv_data;
363 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->uses_2pass)
367 av_free(s->block_base);
369 int sbs = s->sb_cols * s->sb_rows;
371 s->b_base = av_malloc(sizeof(VP9Block) * s->cols * s->rows);
372 s->block_base = av_mallocz((64 * 64 + 128) * sbs * 3);
373 if (!s->b_base || !s->block_base)
374 return AVERROR(ENOMEM);
375 s->uvblock_base[0] = s->block_base + sbs * 64 * 64;
376 s->uvblock_base[1] = s->uvblock_base[0] + sbs * 32 * 32;
377 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * 32 * 32);
378 s->uveob_base[0] = s->eob_base + 256 * sbs;
379 s->uveob_base[1] = s->uveob_base[0] + 64 * sbs;
381 s->b_base = av_malloc(sizeof(VP9Block));
382 s->block_base = av_mallocz((64 * 64 + 128) * 3);
383 if (!s->b_base || !s->block_base)
384 return AVERROR(ENOMEM);
385 s->uvblock_base[0] = s->block_base + 64 * 64;
386 s->uvblock_base[1] = s->uvblock_base[0] + 32 * 32;
387 s->eob_base = (uint8_t *) (s->uvblock_base[1] + 32 * 32);
388 s->uveob_base[0] = s->eob_base + 256;
389 s->uveob_base[1] = s->uveob_base[0] + 64;
391 s->block_alloc_using_2pass = s->uses_2pass;
396 // for some reason the sign bit is at the end, not the start, of a bit sequence
397 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
399 int v = get_bits(gb, n);
400 return get_bits1(gb) ? -v : v;
403 static av_always_inline int inv_recenter_nonneg(int v, int m)
405 return v > 2 * m ? v : v & 1 ? m - ((v + 1) >> 1) : m + (v >> 1);
408 // differential forward probability updates
409 static int update_prob(VP56RangeCoder *c, int p)
411 static const int inv_map_table[254] = {
412 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
413 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
414 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
415 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
416 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
417 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
418 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
419 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
420 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
421 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
422 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
423 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
424 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
425 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
426 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
427 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
428 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
429 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
434 /* This code is trying to do a differential probability update. For a
435 * current probability A in the range [1, 255], the difference to a new
436 * probability of any value can be expressed differentially as 1-A,255-A
437 * where some part of this (absolute range) exists both in positive as
438 * well as the negative part, whereas another part only exists in one
439 * half. We're trying to code this shared part differentially, i.e.
440 * times two where the value of the lowest bit specifies the sign, and
441 * the single part is then coded on top of this. This absolute difference
442 * then again has a value of [0,254], but a bigger value in this range
443 * indicates that we're further away from the original value A, so we
444 * can code this as a VLC code, since higher values are increasingly
445 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
446 * updates vs. the 'fine, exact' updates further down the range, which
447 * adds one extra dimension to this differential update model. */
449 if (!vp8_rac_get(c)) {
450 d = vp8_rac_get_uint(c, 4) + 0;
451 } else if (!vp8_rac_get(c)) {
452 d = vp8_rac_get_uint(c, 4) + 16;
453 } else if (!vp8_rac_get(c)) {
454 d = vp8_rac_get_uint(c, 5) + 32;
456 d = vp8_rac_get_uint(c, 7);
458 d = (d << 1) - 65 + vp8_rac_get(c);
462 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
463 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
466 static int decode_frame_header(AVCodecContext *ctx,
467 const uint8_t *data, int size, int *ref)
469 VP9Context *s = ctx->priv_data;
470 int c, i, j, k, l, m, n, w, h, max, size2, res, sharp;
472 const uint8_t *data2;
475 if ((res = init_get_bits8(&s->gb, data, size)) < 0) {
476 av_log(ctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
479 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
480 av_log(ctx, AV_LOG_ERROR, "Invalid frame marker\n");
481 return AVERROR_INVALIDDATA;
483 s->profile = get_bits1(&s->gb);
484 if (get_bits1(&s->gb)) { // reserved bit
485 av_log(ctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
486 return AVERROR_INVALIDDATA;
488 if (get_bits1(&s->gb)) {
489 *ref = get_bits(&s->gb, 3);
492 s->last_uses_2pass = s->uses_2pass;
493 s->last_keyframe = s->keyframe;
494 s->keyframe = !get_bits1(&s->gb);
495 last_invisible = s->invisible;
496 s->invisible = !get_bits1(&s->gb);
497 s->errorres = get_bits1(&s->gb);
498 s->use_last_frame_mvs = !s->errorres && !last_invisible;
500 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
501 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
502 return AVERROR_INVALIDDATA;
504 s->colorspace = get_bits(&s->gb, 3);
505 if (s->colorspace == 7) { // RGB = profile 1
506 av_log(ctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
507 return AVERROR_INVALIDDATA;
509 s->fullrange = get_bits1(&s->gb);
510 // for profile 1, here follows the subsampling bits
511 s->refreshrefmask = 0xff;
512 w = get_bits(&s->gb, 16) + 1;
513 h = get_bits(&s->gb, 16) + 1;
514 if (get_bits1(&s->gb)) // display size
515 skip_bits(&s->gb, 32);
517 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
518 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
520 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
521 av_log(ctx, AV_LOG_ERROR, "Invalid sync code\n");
522 return AVERROR_INVALIDDATA;
524 s->refreshrefmask = get_bits(&s->gb, 8);
525 w = get_bits(&s->gb, 16) + 1;
526 h = get_bits(&s->gb, 16) + 1;
527 if (get_bits1(&s->gb)) // display size
528 skip_bits(&s->gb, 32);
530 s->refreshrefmask = get_bits(&s->gb, 8);
531 s->refidx[0] = get_bits(&s->gb, 3);
532 s->signbias[0] = get_bits1(&s->gb);
533 s->refidx[1] = get_bits(&s->gb, 3);
534 s->signbias[1] = get_bits1(&s->gb);
535 s->refidx[2] = get_bits(&s->gb, 3);
536 s->signbias[2] = get_bits1(&s->gb);
537 if (!s->refs[s->refidx[0]].f->data[0] ||
538 !s->refs[s->refidx[1]].f->data[0] ||
539 !s->refs[s->refidx[2]].f->data[0]) {
540 av_log(ctx, AV_LOG_ERROR, "Not all references are available\n");
541 return AVERROR_INVALIDDATA;
543 if (get_bits1(&s->gb)) {
544 w = s->refs[s->refidx[0]].f->width;
545 h = s->refs[s->refidx[0]].f->height;
546 } else if (get_bits1(&s->gb)) {
547 w = s->refs[s->refidx[1]].f->width;
548 h = s->refs[s->refidx[1]].f->height;
549 } else if (get_bits1(&s->gb)) {
550 w = s->refs[s->refidx[2]].f->width;
551 h = s->refs[s->refidx[2]].f->height;
553 w = get_bits(&s->gb, 16) + 1;
554 h = get_bits(&s->gb, 16) + 1;
556 // Note that in this code, "CUR_FRAME" is actually before we
557 // have formally allocated a frame, and thus actually represents
559 s->use_last_frame_mvs &= s->frames[CUR_FRAME].tf.f->width == w &&
560 s->frames[CUR_FRAME].tf.f->height == h;
561 if (get_bits1(&s->gb)) // display size
562 skip_bits(&s->gb, 32);
563 s->highprecisionmvs = get_bits1(&s->gb);
564 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
566 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
567 s->signbias[0] != s->signbias[2];
568 if (s->allowcompinter) {
569 if (s->signbias[0] == s->signbias[1]) {
571 s->varcompref[0] = 0;
572 s->varcompref[1] = 1;
573 } else if (s->signbias[0] == s->signbias[2]) {
575 s->varcompref[0] = 0;
576 s->varcompref[1] = 2;
579 s->varcompref[0] = 1;
580 s->varcompref[1] = 2;
585 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
586 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
587 s->framectxid = c = get_bits(&s->gb, 2);
589 /* loopfilter header data */
590 s->filter.level = get_bits(&s->gb, 6);
591 sharp = get_bits(&s->gb, 3);
592 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
593 // the old cache values since they are still valid
594 if (s->filter.sharpness != sharp)
595 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
596 s->filter.sharpness = sharp;
597 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
598 if (get_bits1(&s->gb)) {
599 for (i = 0; i < 4; i++)
600 if (get_bits1(&s->gb))
601 s->lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
602 for (i = 0; i < 2; i++)
603 if (get_bits1(&s->gb))
604 s->lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
607 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
610 /* quantization header data */
611 s->yac_qi = get_bits(&s->gb, 8);
612 s->ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
613 s->uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
614 s->uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
615 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
616 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
618 /* segmentation header info */
619 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
620 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
621 for (i = 0; i < 7; i++)
622 s->prob.seg[i] = get_bits1(&s->gb) ?
623 get_bits(&s->gb, 8) : 255;
624 if ((s->segmentation.temporal = get_bits1(&s->gb))) {
625 for (i = 0; i < 3; i++)
626 s->prob.segpred[i] = get_bits1(&s->gb) ?
627 get_bits(&s->gb, 8) : 255;
630 if ((!s->segmentation.update_map || s->segmentation.temporal) &&
631 (w != s->frames[CUR_FRAME].tf.f->width ||
632 h != s->frames[CUR_FRAME].tf.f->height)) {
633 av_log(ctx, AV_LOG_ERROR,
634 "Reference segmap (temp=%d,update=%d) enabled on size-change!\n",
635 s->segmentation.temporal, s->segmentation.update_map);
636 return AVERROR_INVALIDDATA;
639 if (get_bits1(&s->gb)) {
640 s->segmentation.absolute_vals = get_bits1(&s->gb);
641 for (i = 0; i < 8; i++) {
642 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
643 s->segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
644 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
645 s->segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
646 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
647 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
648 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
652 s->segmentation.feat[0].q_enabled = 0;
653 s->segmentation.feat[0].lf_enabled = 0;
654 s->segmentation.feat[0].skip_enabled = 0;
655 s->segmentation.feat[0].ref_enabled = 0;
658 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
659 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
660 int qyac, qydc, quvac, quvdc, lflvl, sh;
662 if (s->segmentation.feat[i].q_enabled) {
663 if (s->segmentation.absolute_vals)
664 qyac = s->segmentation.feat[i].q_val;
666 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
670 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
671 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
672 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
673 qyac = av_clip_uintp2(qyac, 8);
675 s->segmentation.feat[i].qmul[0][0] = vp9_dc_qlookup[qydc];
676 s->segmentation.feat[i].qmul[0][1] = vp9_ac_qlookup[qyac];
677 s->segmentation.feat[i].qmul[1][0] = vp9_dc_qlookup[quvdc];
678 s->segmentation.feat[i].qmul[1][1] = vp9_ac_qlookup[quvac];
680 sh = s->filter.level >= 32;
681 if (s->segmentation.feat[i].lf_enabled) {
682 if (s->segmentation.absolute_vals)
683 lflvl = s->segmentation.feat[i].lf_val;
685 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
687 lflvl = s->filter.level;
689 s->segmentation.feat[i].lflvl[0][0] =
690 s->segmentation.feat[i].lflvl[0][1] =
691 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
692 for (j = 1; j < 4; j++) {
693 s->segmentation.feat[i].lflvl[j][0] =
694 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
695 s->lf_delta.mode[0]) << sh), 6);
696 s->segmentation.feat[i].lflvl[j][1] =
697 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
698 s->lf_delta.mode[1]) << sh), 6);
703 if ((res = update_size(ctx, w, h)) < 0) {
704 av_log(ctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d\n", w, h);
707 for (s->tiling.log2_tile_cols = 0;
708 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
709 s->tiling.log2_tile_cols++) ;
710 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
711 max = FFMAX(0, max - 1);
712 while (max > s->tiling.log2_tile_cols) {
713 if (get_bits1(&s->gb))
714 s->tiling.log2_tile_cols++;
718 s->tiling.log2_tile_rows = decode012(&s->gb);
719 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
720 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
721 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
722 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
723 sizeof(VP56RangeCoder) * s->tiling.tile_cols);
725 av_log(ctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
726 return AVERROR(ENOMEM);
730 if (s->keyframe || s->errorres || s->intraonly) {
731 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
732 s->prob_ctx[3].p = vp9_default_probs;
733 memcpy(s->prob_ctx[0].coef, vp9_default_coef_probs,
734 sizeof(vp9_default_coef_probs));
735 memcpy(s->prob_ctx[1].coef, vp9_default_coef_probs,
736 sizeof(vp9_default_coef_probs));
737 memcpy(s->prob_ctx[2].coef, vp9_default_coef_probs,
738 sizeof(vp9_default_coef_probs));
739 memcpy(s->prob_ctx[3].coef, vp9_default_coef_probs,
740 sizeof(vp9_default_coef_probs));
743 // next 16 bits is size of the rest of the header (arith-coded)
744 size2 = get_bits(&s->gb, 16);
745 data2 = align_get_bits(&s->gb);
746 if (size2 > size - (data2 - data)) {
747 av_log(ctx, AV_LOG_ERROR, "Invalid compressed header size\n");
748 return AVERROR_INVALIDDATA;
750 ff_vp56_init_range_decoder(&s->c, data2, size2);
751 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
752 av_log(ctx, AV_LOG_ERROR, "Marker bit was set\n");
753 return AVERROR_INVALIDDATA;
756 if (s->keyframe || s->intraonly) {
757 memset(s->counts.coef, 0, sizeof(s->counts.coef) + sizeof(s->counts.eob));
759 memset(&s->counts, 0, sizeof(s->counts));
761 // FIXME is it faster to not copy here, but do it down in the fw updates
762 // as explicit copies if the fw update is missing (and skip the copy upon
764 s->prob.p = s->prob_ctx[c].p;
768 s->txfmmode = TX_4X4;
770 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
771 if (s->txfmmode == 3)
772 s->txfmmode += vp8_rac_get(&s->c);
774 if (s->txfmmode == TX_SWITCHABLE) {
775 for (i = 0; i < 2; i++)
776 if (vp56_rac_get_prob_branchy(&s->c, 252))
777 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
778 for (i = 0; i < 2; i++)
779 for (j = 0; j < 2; j++)
780 if (vp56_rac_get_prob_branchy(&s->c, 252))
781 s->prob.p.tx16p[i][j] =
782 update_prob(&s->c, s->prob.p.tx16p[i][j]);
783 for (i = 0; i < 2; i++)
784 for (j = 0; j < 3; j++)
785 if (vp56_rac_get_prob_branchy(&s->c, 252))
786 s->prob.p.tx32p[i][j] =
787 update_prob(&s->c, s->prob.p.tx32p[i][j]);
792 for (i = 0; i < 4; i++) {
793 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
794 if (vp8_rac_get(&s->c)) {
795 for (j = 0; j < 2; j++)
796 for (k = 0; k < 2; k++)
797 for (l = 0; l < 6; l++)
798 for (m = 0; m < 6; m++) {
799 uint8_t *p = s->prob.coef[i][j][k][l][m];
800 uint8_t *r = ref[j][k][l][m];
801 if (m >= 3 && l == 0) // dc only has 3 pt
803 for (n = 0; n < 3; n++) {
804 if (vp56_rac_get_prob_branchy(&s->c, 252)) {
805 p[n] = update_prob(&s->c, r[n]);
813 for (j = 0; j < 2; j++)
814 for (k = 0; k < 2; k++)
815 for (l = 0; l < 6; l++)
816 for (m = 0; m < 6; m++) {
817 uint8_t *p = s->prob.coef[i][j][k][l][m];
818 uint8_t *r = ref[j][k][l][m];
819 if (m > 3 && l == 0) // dc only has 3 pt
825 if (s->txfmmode == i)
830 for (i = 0; i < 3; i++)
831 if (vp56_rac_get_prob_branchy(&s->c, 252))
832 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
833 if (!s->keyframe && !s->intraonly) {
834 for (i = 0; i < 7; i++)
835 for (j = 0; j < 3; j++)
836 if (vp56_rac_get_prob_branchy(&s->c, 252))
837 s->prob.p.mv_mode[i][j] =
838 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
840 if (s->filtermode == FILTER_SWITCHABLE)
841 for (i = 0; i < 4; i++)
842 for (j = 0; j < 2; j++)
843 if (vp56_rac_get_prob_branchy(&s->c, 252))
844 s->prob.p.filter[i][j] =
845 update_prob(&s->c, s->prob.p.filter[i][j]);
847 for (i = 0; i < 4; i++)
848 if (vp56_rac_get_prob_branchy(&s->c, 252))
849 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
851 if (s->allowcompinter) {
852 s->comppredmode = vp8_rac_get(&s->c);
854 s->comppredmode += vp8_rac_get(&s->c);
855 if (s->comppredmode == PRED_SWITCHABLE)
856 for (i = 0; i < 5; i++)
857 if (vp56_rac_get_prob_branchy(&s->c, 252))
859 update_prob(&s->c, s->prob.p.comp[i]);
861 s->comppredmode = PRED_SINGLEREF;
864 if (s->comppredmode != PRED_COMPREF) {
865 for (i = 0; i < 5; i++) {
866 if (vp56_rac_get_prob_branchy(&s->c, 252))
867 s->prob.p.single_ref[i][0] =
868 update_prob(&s->c, s->prob.p.single_ref[i][0]);
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.single_ref[i][1] =
871 update_prob(&s->c, s->prob.p.single_ref[i][1]);
875 if (s->comppredmode != PRED_SINGLEREF) {
876 for (i = 0; i < 5; i++)
877 if (vp56_rac_get_prob_branchy(&s->c, 252))
878 s->prob.p.comp_ref[i] =
879 update_prob(&s->c, s->prob.p.comp_ref[i]);
882 for (i = 0; i < 4; i++)
883 for (j = 0; j < 9; j++)
884 if (vp56_rac_get_prob_branchy(&s->c, 252))
885 s->prob.p.y_mode[i][j] =
886 update_prob(&s->c, s->prob.p.y_mode[i][j]);
888 for (i = 0; i < 4; i++)
889 for (j = 0; j < 4; j++)
890 for (k = 0; k < 3; k++)
891 if (vp56_rac_get_prob_branchy(&s->c, 252))
892 s->prob.p.partition[3 - i][j][k] =
893 update_prob(&s->c, s->prob.p.partition[3 - i][j][k]);
895 // mv fields don't use the update_prob subexp model for some reason
896 for (i = 0; i < 3; i++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
900 for (i = 0; i < 2; i++) {
901 if (vp56_rac_get_prob_branchy(&s->c, 252))
902 s->prob.p.mv_comp[i].sign = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
904 for (j = 0; j < 10; j++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.mv_comp[i].classes[j] =
907 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
909 if (vp56_rac_get_prob_branchy(&s->c, 252))
910 s->prob.p.mv_comp[i].class0 = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
912 for (j = 0; j < 10; j++)
913 if (vp56_rac_get_prob_branchy(&s->c, 252))
914 s->prob.p.mv_comp[i].bits[j] =
915 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
918 for (i = 0; i < 2; i++) {
919 for (j = 0; j < 2; j++)
920 for (k = 0; k < 3; k++)
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 s->prob.p.mv_comp[i].class0_fp[j][k] =
923 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
925 for (j = 0; j < 3; j++)
926 if (vp56_rac_get_prob_branchy(&s->c, 252))
927 s->prob.p.mv_comp[i].fp[j] =
928 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
931 if (s->highprecisionmvs) {
932 for (i = 0; i < 2; i++) {
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.mv_comp[i].class0_hp =
935 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.mv_comp[i].hp =
939 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
944 return (data2 - data) + size2;
947 static av_always_inline void clamp_mv(VP56mv *dst, const VP56mv *src,
950 dst->x = av_clip(src->x, s->min_mv.x, s->max_mv.x);
951 dst->y = av_clip(src->y, s->min_mv.y, s->max_mv.y);
954 static void find_ref_mvs(VP9Context *s,
955 VP56mv *pmv, int ref, int z, int idx, int sb)
957 static const int8_t mv_ref_blk_off[N_BS_SIZES][8][2] = {
958 [BS_64x64] = {{ 3, -1 }, { -1, 3 }, { 4, -1 }, { -1, 4 },
959 { -1, -1 }, { 0, -1 }, { -1, 0 }, { 6, -1 }},
960 [BS_64x32] = {{ 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 },
961 { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 }},
962 [BS_32x64] = {{ -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 },
963 { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 }},
964 [BS_32x32] = {{ 1, -1 }, { -1, 1 }, { 2, -1 }, { -1, 2 },
965 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
966 [BS_32x16] = {{ 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 },
967 { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
968 [BS_16x32] = {{ -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 },
969 { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 }},
970 [BS_16x16] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, 1 },
971 { -1, -1 }, { 0, -3 }, { -3, 0 }, { -3, -3 }},
972 [BS_16x8] = {{ 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 },
973 { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 }},
974 [BS_8x16] = {{ -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 },
975 { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 }},
976 [BS_8x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
977 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
978 [BS_8x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
979 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
980 [BS_4x8] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
981 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
982 [BS_4x4] = {{ 0, -1 }, { -1, 0 }, { -1, -1 }, { 0, -2 },
983 { -2, 0 }, { -1, -2 }, { -2, -1 }, { -2, -2 }},
986 int row = s->row, col = s->col, row7 = s->row7;
987 const int8_t (*p)[2] = mv_ref_blk_off[b->bs];
988 #define INVALID_MV 0x80008000U
989 uint32_t mem = INVALID_MV;
992 #define RETURN_DIRECT_MV(mv) \
994 uint32_t m = AV_RN32A(&mv); \
998 } else if (mem == INVALID_MV) { \
1000 } else if (m != mem) { \
1007 if (sb == 2 || sb == 1) {
1008 RETURN_DIRECT_MV(b->mv[0][z]);
1009 } else if (sb == 3) {
1010 RETURN_DIRECT_MV(b->mv[2][z]);
1011 RETURN_DIRECT_MV(b->mv[1][z]);
1012 RETURN_DIRECT_MV(b->mv[0][z]);
1015 #define RETURN_MV(mv) \
1020 clamp_mv(&tmp, &mv, s); \
1021 m = AV_RN32A(&tmp); \
1025 } else if (mem == INVALID_MV) { \
1027 } else if (m != mem) { \
1032 uint32_t m = AV_RN32A(&mv); \
1034 clamp_mv(pmv, &mv, s); \
1036 } else if (mem == INVALID_MV) { \
1038 } else if (m != mem) { \
1039 clamp_mv(pmv, &mv, s); \
1046 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[(row - 1) * s->sb_cols * 8 + col];
1047 if (mv->ref[0] == ref) {
1048 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][0]);
1049 } else if (mv->ref[1] == ref) {
1050 RETURN_MV(s->above_mv_ctx[2 * col + (sb & 1)][1]);
1053 if (col > s->tiling.tile_col_start) {
1054 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[row * s->sb_cols * 8 + col - 1];
1055 if (mv->ref[0] == ref) {
1056 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][0]);
1057 } else if (mv->ref[1] == ref) {
1058 RETURN_MV(s->left_mv_ctx[2 * row7 + (sb >> 1)][1]);
1066 // previously coded MVs in this neighbourhood, using same reference frame
1067 for (; i < 8; i++) {
1068 int c = p[i][0] + col, r = p[i][1] + row;
1070 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1071 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1073 if (mv->ref[0] == ref) {
1074 RETURN_MV(mv->mv[0]);
1075 } else if (mv->ref[1] == ref) {
1076 RETURN_MV(mv->mv[1]);
1081 // MV at this position in previous frame, using same reference frame
1082 if (s->use_last_frame_mvs) {
1083 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1085 if (!s->last_uses_2pass)
1086 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1087 if (mv->ref[0] == ref) {
1088 RETURN_MV(mv->mv[0]);
1089 } else if (mv->ref[1] == ref) {
1090 RETURN_MV(mv->mv[1]);
1094 #define RETURN_SCALE_MV(mv, scale) \
1097 VP56mv mv_temp = { -mv.x, -mv.y }; \
1098 RETURN_MV(mv_temp); \
1104 // previously coded MVs in this neighbourhood, using different reference frame
1105 for (i = 0; i < 8; i++) {
1106 int c = p[i][0] + col, r = p[i][1] + row;
1108 if (c >= s->tiling.tile_col_start && c < s->cols && r >= 0 && r < s->rows) {
1109 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[r * s->sb_cols * 8 + c];
1111 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1112 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1114 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1115 // BUG - libvpx has this condition regardless of whether
1116 // we used the first ref MV and pre-scaling
1117 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1118 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1123 // MV at this position in previous frame, using different reference frame
1124 if (s->use_last_frame_mvs) {
1125 struct VP9mvrefPair *mv = &s->frames[LAST_FRAME].mv[row * s->sb_cols * 8 + col];
1127 // no need to await_progress, because we already did that above
1128 if (mv->ref[0] != ref && mv->ref[0] >= 0) {
1129 RETURN_SCALE_MV(mv->mv[0], s->signbias[mv->ref[0]] != s->signbias[ref]);
1131 if (mv->ref[1] != ref && mv->ref[1] >= 0 &&
1132 // BUG - libvpx has this condition regardless of whether
1133 // we used the first ref MV and pre-scaling
1134 AV_RN32A(&mv->mv[0]) != AV_RN32A(&mv->mv[1])) {
1135 RETURN_SCALE_MV(mv->mv[1], s->signbias[mv->ref[1]] != s->signbias[ref]);
1142 #undef RETURN_SCALE_MV
1145 static av_always_inline int read_mv_component(VP9Context *s, int idx, int hp)
1147 int bit, sign = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].sign);
1148 int n, c = vp8_rac_get_tree(&s->c, vp9_mv_class_tree,
1149 s->prob.p.mv_comp[idx].classes);
1151 s->counts.mv_comp[idx].sign[sign]++;
1152 s->counts.mv_comp[idx].classes[c]++;
1156 for (n = 0, m = 0; m < c; m++) {
1157 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].bits[m]);
1159 s->counts.mv_comp[idx].bits[m][bit]++;
1162 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree, s->prob.p.mv_comp[idx].fp);
1164 s->counts.mv_comp[idx].fp[bit]++;
1166 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].hp);
1167 s->counts.mv_comp[idx].hp[bit]++;
1171 // bug in libvpx - we count for bw entropy purposes even if the
1173 s->counts.mv_comp[idx].hp[1]++;
1177 n = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0);
1178 s->counts.mv_comp[idx].class0[n]++;
1179 bit = vp8_rac_get_tree(&s->c, vp9_mv_fp_tree,
1180 s->prob.p.mv_comp[idx].class0_fp[n]);
1181 s->counts.mv_comp[idx].class0_fp[n][bit]++;
1182 n = (n << 3) | (bit << 1);
1184 bit = vp56_rac_get_prob(&s->c, s->prob.p.mv_comp[idx].class0_hp);
1185 s->counts.mv_comp[idx].class0_hp[bit]++;
1189 // bug in libvpx - we count for bw entropy purposes even if the
1191 s->counts.mv_comp[idx].class0_hp[1]++;
1195 return sign ? -(n + 1) : (n + 1);
1198 static void fill_mv(VP9Context *s,
1199 VP56mv *mv, int mode, int sb)
1203 if (mode == ZEROMV) {
1208 // FIXME cache this value and reuse for other subblocks
1209 find_ref_mvs(s, &mv[0], b->ref[0], 0, mode == NEARMV,
1210 mode == NEWMV ? -1 : sb);
1211 // FIXME maybe move this code into find_ref_mvs()
1212 if ((mode == NEWMV || sb == -1) &&
1213 !(hp = s->highprecisionmvs && abs(mv[0].x) < 64 && abs(mv[0].y) < 64)) {
1227 if (mode == NEWMV) {
1228 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1229 s->prob.p.mv_joint);
1231 s->counts.mv_joint[j]++;
1232 if (j >= MV_JOINT_V)
1233 mv[0].y += read_mv_component(s, 0, hp);
1235 mv[0].x += read_mv_component(s, 1, hp);
1239 // FIXME cache this value and reuse for other subblocks
1240 find_ref_mvs(s, &mv[1], b->ref[1], 1, mode == NEARMV,
1241 mode == NEWMV ? -1 : sb);
1242 if ((mode == NEWMV || sb == -1) &&
1243 !(hp = s->highprecisionmvs && abs(mv[1].x) < 64 && abs(mv[1].y) < 64)) {
1257 if (mode == NEWMV) {
1258 enum MVJoint j = vp8_rac_get_tree(&s->c, vp9_mv_joint_tree,
1259 s->prob.p.mv_joint);
1261 s->counts.mv_joint[j]++;
1262 if (j >= MV_JOINT_V)
1263 mv[1].y += read_mv_component(s, 0, hp);
1265 mv[1].x += read_mv_component(s, 1, hp);
1271 static av_always_inline void setctx_2d(uint8_t *ptr, int w, int h,
1272 ptrdiff_t stride, int v)
1282 int v16 = v * 0x0101;
1290 uint32_t v32 = v * 0x01010101;
1299 uint64_t v64 = v * 0x0101010101010101ULL;
1305 uint32_t v32 = v * 0x01010101;
1308 AV_WN32A(ptr + 4, v32);
1317 static void decode_mode(AVCodecContext *ctx)
1319 static const uint8_t left_ctx[N_BS_SIZES] = {
1320 0x0, 0x8, 0x0, 0x8, 0xc, 0x8, 0xc, 0xe, 0xc, 0xe, 0xf, 0xe, 0xf
1322 static const uint8_t above_ctx[N_BS_SIZES] = {
1323 0x0, 0x0, 0x8, 0x8, 0x8, 0xc, 0xc, 0xc, 0xe, 0xe, 0xe, 0xf, 0xf
1325 static const uint8_t max_tx_for_bl_bp[N_BS_SIZES] = {
1326 TX_32X32, TX_32X32, TX_32X32, TX_32X32, TX_16X16, TX_16X16,
1327 TX_16X16, TX_8X8, TX_8X8, TX_8X8, TX_4X4, TX_4X4, TX_4X4
1329 VP9Context *s = ctx->priv_data;
1331 int row = s->row, col = s->col, row7 = s->row7;
1332 enum TxfmMode max_tx = max_tx_for_bl_bp[b->bs];
1333 int w4 = FFMIN(s->cols - col, bwh_tab[1][b->bs][0]);
1334 int h4 = FFMIN(s->rows - row, bwh_tab[1][b->bs][1]), y;
1335 int have_a = row > 0, have_l = col > s->tiling.tile_col_start;
1336 int vref, filter_id;
1338 if (!s->segmentation.enabled) {
1340 } else if (s->keyframe || s->intraonly) {
1341 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree, s->prob.seg);
1342 } else if (!s->segmentation.update_map ||
1343 (s->segmentation.temporal &&
1344 vp56_rac_get_prob_branchy(&s->c,
1345 s->prob.segpred[s->above_segpred_ctx[col] +
1346 s->left_segpred_ctx[row7]]))) {
1348 uint8_t *refsegmap = s->frames[LAST_FRAME].segmentation_map;
1350 if (!s->last_uses_2pass)
1351 ff_thread_await_progress(&s->frames[LAST_FRAME].tf, row >> 3, 0);
1352 for (y = 0; y < h4; y++)
1353 for (x = 0; x < w4; x++)
1354 pred = FFMIN(pred, refsegmap[(y + row) * 8 * s->sb_cols + x + col]);
1355 av_assert1(pred < 8);
1358 memset(&s->above_segpred_ctx[col], 1, w4);
1359 memset(&s->left_segpred_ctx[row7], 1, h4);
1361 b->seg_id = vp8_rac_get_tree(&s->c, vp9_segmentation_tree,
1364 memset(&s->above_segpred_ctx[col], 0, w4);
1365 memset(&s->left_segpred_ctx[row7], 0, h4);
1367 if (s->segmentation.enabled &&
1368 (s->segmentation.update_map || s->keyframe || s->intraonly)) {
1369 setctx_2d(&s->frames[CUR_FRAME].segmentation_map[row * 8 * s->sb_cols + col],
1370 w4, h4, 8 * s->sb_cols, b->seg_id);
1373 b->skip = s->segmentation.enabled &&
1374 s->segmentation.feat[b->seg_id].skip_enabled;
1376 int c = s->left_skip_ctx[row7] + s->above_skip_ctx[col];
1377 b->skip = vp56_rac_get_prob(&s->c, s->prob.p.skip[c]);
1378 s->counts.skip[c][b->skip]++;
1381 if (s->keyframe || s->intraonly) {
1383 } else if (s->segmentation.feat[b->seg_id].ref_enabled) {
1384 b->intra = !s->segmentation.feat[b->seg_id].ref_val;
1388 if (have_a && have_l) {
1389 c = s->above_intra_ctx[col] + s->left_intra_ctx[row7];
1392 c = have_a ? 2 * s->above_intra_ctx[col] :
1393 have_l ? 2 * s->left_intra_ctx[row7] : 0;
1395 bit = vp56_rac_get_prob(&s->c, s->prob.p.intra[c]);
1396 s->counts.intra[c][bit]++;
1400 if ((b->intra || !b->skip) && s->txfmmode == TX_SWITCHABLE) {
1404 c = (s->above_skip_ctx[col] ? max_tx :
1405 s->above_txfm_ctx[col]) +
1406 (s->left_skip_ctx[row7] ? max_tx :
1407 s->left_txfm_ctx[row7]) > max_tx;
1409 c = s->above_skip_ctx[col] ? 1 :
1410 (s->above_txfm_ctx[col] * 2 > max_tx);
1412 } else if (have_l) {
1413 c = s->left_skip_ctx[row7] ? 1 :
1414 (s->left_txfm_ctx[row7] * 2 > max_tx);
1420 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][0]);
1422 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][1]);
1424 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx32p[c][2]);
1426 s->counts.tx32p[c][b->tx]++;
1429 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][0]);
1431 b->tx += vp56_rac_get_prob(&s->c, s->prob.p.tx16p[c][1]);
1432 s->counts.tx16p[c][b->tx]++;
1435 b->tx = vp56_rac_get_prob(&s->c, s->prob.p.tx8p[c]);
1436 s->counts.tx8p[c][b->tx]++;
1443 b->tx = FFMIN(max_tx, s->txfmmode);
1446 if (s->keyframe || s->intraonly) {
1447 uint8_t *a = &s->above_mode_ctx[col * 2];
1448 uint8_t *l = &s->left_mode_ctx[(row7) << 1];
1451 if (b->bs > BS_8x8) {
1452 // FIXME the memory storage intermediates here aren't really
1453 // necessary, they're just there to make the code slightly
1455 b->mode[0] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1456 vp9_default_kf_ymode_probs[a[0]][l[0]]);
1457 if (b->bs != BS_8x4) {
1458 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1459 vp9_default_kf_ymode_probs[a[1]][b->mode[0]]);
1460 l[0] = a[1] = b->mode[1];
1462 l[0] = a[1] = b->mode[1] = b->mode[0];
1464 if (b->bs != BS_4x8) {
1465 b->mode[2] = a[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1466 vp9_default_kf_ymode_probs[a[0]][l[1]]);
1467 if (b->bs != BS_8x4) {
1468 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1469 vp9_default_kf_ymode_probs[a[1]][b->mode[2]]);
1470 l[1] = a[1] = b->mode[3];
1472 l[1] = a[1] = b->mode[3] = b->mode[2];
1475 b->mode[2] = b->mode[0];
1476 l[1] = a[1] = b->mode[3] = b->mode[1];
1479 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1480 vp9_default_kf_ymode_probs[*a][*l]);
1481 b->mode[3] = b->mode[2] = b->mode[1] = b->mode[0];
1482 // FIXME this can probably be optimized
1483 memset(a, b->mode[0], bwh_tab[0][b->bs][0]);
1484 memset(l, b->mode[0], bwh_tab[0][b->bs][1]);
1486 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1487 vp9_default_kf_uvmode_probs[b->mode[3]]);
1488 } else if (b->intra) {
1490 if (b->bs > BS_8x8) {
1491 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1492 s->prob.p.y_mode[0]);
1493 s->counts.y_mode[0][b->mode[0]]++;
1494 if (b->bs != BS_8x4) {
1495 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1496 s->prob.p.y_mode[0]);
1497 s->counts.y_mode[0][b->mode[1]]++;
1499 b->mode[1] = b->mode[0];
1501 if (b->bs != BS_4x8) {
1502 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1503 s->prob.p.y_mode[0]);
1504 s->counts.y_mode[0][b->mode[2]]++;
1505 if (b->bs != BS_8x4) {
1506 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1507 s->prob.p.y_mode[0]);
1508 s->counts.y_mode[0][b->mode[3]]++;
1510 b->mode[3] = b->mode[2];
1513 b->mode[2] = b->mode[0];
1514 b->mode[3] = b->mode[1];
1517 static const uint8_t size_group[10] = {
1518 3, 3, 3, 3, 2, 2, 2, 1, 1, 1
1520 int sz = size_group[b->bs];
1522 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1523 s->prob.p.y_mode[sz]);
1524 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1525 s->counts.y_mode[sz][b->mode[3]]++;
1527 b->uvmode = vp8_rac_get_tree(&s->c, vp9_intramode_tree,
1528 s->prob.p.uv_mode[b->mode[3]]);
1529 s->counts.uv_mode[b->mode[3]][b->uvmode]++;
1531 static const uint8_t inter_mode_ctx_lut[14][14] = {
1532 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1533 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1534 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1535 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1536 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1537 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1538 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1539 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1540 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1541 { 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5 },
1542 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1543 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 2, 2, 1, 3 },
1544 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 3 },
1545 { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 3, 3, 3, 4 },
1548 if (s->segmentation.feat[b->seg_id].ref_enabled) {
1549 av_assert2(s->segmentation.feat[b->seg_id].ref_val != 0);
1551 b->ref[0] = s->segmentation.feat[b->seg_id].ref_val - 1;
1553 // read comp_pred flag
1554 if (s->comppredmode != PRED_SWITCHABLE) {
1555 b->comp = s->comppredmode == PRED_COMPREF;
1559 // FIXME add intra as ref=0xff (or -1) to make these easier?
1562 if (s->above_comp_ctx[col] && s->left_comp_ctx[row7]) {
1564 } else if (s->above_comp_ctx[col]) {
1565 c = 2 + (s->left_intra_ctx[row7] ||
1566 s->left_ref_ctx[row7] == s->fixcompref);
1567 } else if (s->left_comp_ctx[row7]) {
1568 c = 2 + (s->above_intra_ctx[col] ||
1569 s->above_ref_ctx[col] == s->fixcompref);
1571 c = (!s->above_intra_ctx[col] &&
1572 s->above_ref_ctx[col] == s->fixcompref) ^
1573 (!s->left_intra_ctx[row7] &&
1574 s->left_ref_ctx[row & 7] == s->fixcompref);
1577 c = s->above_comp_ctx[col] ? 3 :
1578 (!s->above_intra_ctx[col] && s->above_ref_ctx[col] == s->fixcompref);
1580 } else if (have_l) {
1581 c = s->left_comp_ctx[row7] ? 3 :
1582 (!s->left_intra_ctx[row7] && s->left_ref_ctx[row7] == s->fixcompref);
1586 b->comp = vp56_rac_get_prob(&s->c, s->prob.p.comp[c]);
1587 s->counts.comp[c][b->comp]++;
1590 // read actual references
1591 // FIXME probably cache a few variables here to prevent repetitive
1592 // memory accesses below
1593 if (b->comp) /* two references */ {
1594 int fix_idx = s->signbias[s->fixcompref], var_idx = !fix_idx, c, bit;
1596 b->ref[fix_idx] = s->fixcompref;
1597 // FIXME can this codeblob be replaced by some sort of LUT?
1600 if (s->above_intra_ctx[col]) {
1601 if (s->left_intra_ctx[row7]) {
1604 c = 1 + 2 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1606 } else if (s->left_intra_ctx[row7]) {
1607 c = 1 + 2 * (s->above_ref_ctx[col] != s->varcompref[1]);
1609 int refl = s->left_ref_ctx[row7], refa = s->above_ref_ctx[col];
1611 if (refl == refa && refa == s->varcompref[1]) {
1613 } else if (!s->left_comp_ctx[row7] && !s->above_comp_ctx[col]) {
1614 if ((refa == s->fixcompref && refl == s->varcompref[0]) ||
1615 (refl == s->fixcompref && refa == s->varcompref[0])) {
1618 c = (refa == refl) ? 3 : 1;
1620 } else if (!s->left_comp_ctx[row7]) {
1621 if (refa == s->varcompref[1] && refl != s->varcompref[1]) {
1624 c = (refl == s->varcompref[1] &&
1625 refa != s->varcompref[1]) ? 2 : 4;
1627 } else if (!s->above_comp_ctx[col]) {
1628 if (refl == s->varcompref[1] && refa != s->varcompref[1]) {
1631 c = (refa == s->varcompref[1] &&
1632 refl != s->varcompref[1]) ? 2 : 4;
1635 c = (refl == refa) ? 4 : 2;
1639 if (s->above_intra_ctx[col]) {
1641 } else if (s->above_comp_ctx[col]) {
1642 c = 4 * (s->above_ref_ctx[col] != s->varcompref[1]);
1644 c = 3 * (s->above_ref_ctx[col] != s->varcompref[1]);
1647 } else if (have_l) {
1648 if (s->left_intra_ctx[row7]) {
1650 } else if (s->left_comp_ctx[row7]) {
1651 c = 4 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1653 c = 3 * (s->left_ref_ctx[row7] != s->varcompref[1]);
1658 bit = vp56_rac_get_prob(&s->c, s->prob.p.comp_ref[c]);
1659 b->ref[var_idx] = s->varcompref[bit];
1660 s->counts.comp_ref[c][bit]++;
1661 } else /* single reference */ {
1664 if (have_a && !s->above_intra_ctx[col]) {
1665 if (have_l && !s->left_intra_ctx[row7]) {
1666 if (s->left_comp_ctx[row7]) {
1667 if (s->above_comp_ctx[col]) {
1668 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7] ||
1669 !s->above_ref_ctx[col]);
1671 c = (3 * !s->above_ref_ctx[col]) +
1672 (!s->fixcompref || !s->left_ref_ctx[row7]);
1674 } else if (s->above_comp_ctx[col]) {
1675 c = (3 * !s->left_ref_ctx[row7]) +
1676 (!s->fixcompref || !s->above_ref_ctx[col]);
1678 c = 2 * !s->left_ref_ctx[row7] + 2 * !s->above_ref_ctx[col];
1680 } else if (s->above_intra_ctx[col]) {
1682 } else if (s->above_comp_ctx[col]) {
1683 c = 1 + (!s->fixcompref || !s->above_ref_ctx[col]);
1685 c = 4 * (!s->above_ref_ctx[col]);
1687 } else if (have_l && !s->left_intra_ctx[row7]) {
1688 if (s->left_intra_ctx[row7]) {
1690 } else if (s->left_comp_ctx[row7]) {
1691 c = 1 + (!s->fixcompref || !s->left_ref_ctx[row7]);
1693 c = 4 * (!s->left_ref_ctx[row7]);
1698 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][0]);
1699 s->counts.single_ref[c][0][bit]++;
1703 // FIXME can this codeblob be replaced by some sort of LUT?
1706 if (s->left_intra_ctx[row7]) {
1707 if (s->above_intra_ctx[col]) {
1709 } else if (s->above_comp_ctx[col]) {
1710 c = 1 + 2 * (s->fixcompref == 1 ||
1711 s->above_ref_ctx[col] == 1);
1712 } else if (!s->above_ref_ctx[col]) {
1715 c = 4 * (s->above_ref_ctx[col] == 1);
1717 } else if (s->above_intra_ctx[col]) {
1718 if (s->left_intra_ctx[row7]) {
1720 } else if (s->left_comp_ctx[row7]) {
1721 c = 1 + 2 * (s->fixcompref == 1 ||
1722 s->left_ref_ctx[row7] == 1);
1723 } else if (!s->left_ref_ctx[row7]) {
1726 c = 4 * (s->left_ref_ctx[row7] == 1);
1728 } else if (s->above_comp_ctx[col]) {
1729 if (s->left_comp_ctx[row7]) {
1730 if (s->left_ref_ctx[row7] == s->above_ref_ctx[col]) {
1731 c = 3 * (s->fixcompref == 1 ||
1732 s->left_ref_ctx[row7] == 1);
1736 } else if (!s->left_ref_ctx[row7]) {
1737 c = 1 + 2 * (s->fixcompref == 1 ||
1738 s->above_ref_ctx[col] == 1);
1740 c = 3 * (s->left_ref_ctx[row7] == 1) +
1741 (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1743 } else if (s->left_comp_ctx[row7]) {
1744 if (!s->above_ref_ctx[col]) {
1745 c = 1 + 2 * (s->fixcompref == 1 ||
1746 s->left_ref_ctx[row7] == 1);
1748 c = 3 * (s->above_ref_ctx[col] == 1) +
1749 (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1751 } else if (!s->above_ref_ctx[col]) {
1752 if (!s->left_ref_ctx[row7]) {
1755 c = 4 * (s->left_ref_ctx[row7] == 1);
1757 } else if (!s->left_ref_ctx[row7]) {
1758 c = 4 * (s->above_ref_ctx[col] == 1);
1760 c = 2 * (s->left_ref_ctx[row7] == 1) +
1761 2 * (s->above_ref_ctx[col] == 1);
1764 if (s->above_intra_ctx[col] ||
1765 (!s->above_comp_ctx[col] && !s->above_ref_ctx[col])) {
1767 } else if (s->above_comp_ctx[col]) {
1768 c = 3 * (s->fixcompref == 1 || s->above_ref_ctx[col] == 1);
1770 c = 4 * (s->above_ref_ctx[col] == 1);
1773 } else if (have_l) {
1774 if (s->left_intra_ctx[row7] ||
1775 (!s->left_comp_ctx[row7] && !s->left_ref_ctx[row7])) {
1777 } else if (s->left_comp_ctx[row7]) {
1778 c = 3 * (s->fixcompref == 1 || s->left_ref_ctx[row7] == 1);
1780 c = 4 * (s->left_ref_ctx[row7] == 1);
1785 bit = vp56_rac_get_prob(&s->c, s->prob.p.single_ref[c][1]);
1786 s->counts.single_ref[c][1][bit]++;
1787 b->ref[0] = 1 + bit;
1792 if (b->bs <= BS_8x8) {
1793 if (s->segmentation.feat[b->seg_id].skip_enabled) {
1794 b->mode[0] = b->mode[1] = b->mode[2] = b->mode[3] = ZEROMV;
1796 static const uint8_t off[10] = {
1797 3, 0, 0, 1, 0, 0, 0, 0, 0, 0
1800 // FIXME this needs to use the LUT tables from find_ref_mvs
1801 // because not all are -1,0/0,-1
1802 int c = inter_mode_ctx_lut[s->above_mode_ctx[col + off[b->bs]]]
1803 [s->left_mode_ctx[row7 + off[b->bs]]];
1805 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1806 s->prob.p.mv_mode[c]);
1807 b->mode[1] = b->mode[2] = b->mode[3] = b->mode[0];
1808 s->counts.mv_mode[c][b->mode[0] - 10]++;
1812 if (s->filtermode == FILTER_SWITCHABLE) {
1815 if (have_a && s->above_mode_ctx[col] >= NEARESTMV) {
1816 if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1817 c = s->above_filter_ctx[col] == s->left_filter_ctx[row7] ?
1818 s->left_filter_ctx[row7] : 3;
1820 c = s->above_filter_ctx[col];
1822 } else if (have_l && s->left_mode_ctx[row7] >= NEARESTMV) {
1823 c = s->left_filter_ctx[row7];
1828 filter_id = vp8_rac_get_tree(&s->c, vp9_filter_tree,
1829 s->prob.p.filter[c]);
1830 s->counts.filter[c][filter_id]++;
1831 b->filter = vp9_filter_lut[filter_id];
1833 b->filter = s->filtermode;
1836 if (b->bs > BS_8x8) {
1837 int c = inter_mode_ctx_lut[s->above_mode_ctx[col]][s->left_mode_ctx[row7]];
1839 b->mode[0] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1840 s->prob.p.mv_mode[c]);
1841 s->counts.mv_mode[c][b->mode[0] - 10]++;
1842 fill_mv(s, b->mv[0], b->mode[0], 0);
1844 if (b->bs != BS_8x4) {
1845 b->mode[1] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1846 s->prob.p.mv_mode[c]);
1847 s->counts.mv_mode[c][b->mode[1] - 10]++;
1848 fill_mv(s, b->mv[1], b->mode[1], 1);
1850 b->mode[1] = b->mode[0];
1851 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1852 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1855 if (b->bs != BS_4x8) {
1856 b->mode[2] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1857 s->prob.p.mv_mode[c]);
1858 s->counts.mv_mode[c][b->mode[2] - 10]++;
1859 fill_mv(s, b->mv[2], b->mode[2], 2);
1861 if (b->bs != BS_8x4) {
1862 b->mode[3] = vp8_rac_get_tree(&s->c, vp9_inter_mode_tree,
1863 s->prob.p.mv_mode[c]);
1864 s->counts.mv_mode[c][b->mode[3] - 10]++;
1865 fill_mv(s, b->mv[3], b->mode[3], 3);
1867 b->mode[3] = b->mode[2];
1868 AV_COPY32(&b->mv[3][0], &b->mv[2][0]);
1869 AV_COPY32(&b->mv[3][1], &b->mv[2][1]);
1872 b->mode[2] = b->mode[0];
1873 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1874 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1875 b->mode[3] = b->mode[1];
1876 AV_COPY32(&b->mv[3][0], &b->mv[1][0]);
1877 AV_COPY32(&b->mv[3][1], &b->mv[1][1]);
1880 fill_mv(s, b->mv[0], b->mode[0], -1);
1881 AV_COPY32(&b->mv[1][0], &b->mv[0][0]);
1882 AV_COPY32(&b->mv[2][0], &b->mv[0][0]);
1883 AV_COPY32(&b->mv[3][0], &b->mv[0][0]);
1884 AV_COPY32(&b->mv[1][1], &b->mv[0][1]);
1885 AV_COPY32(&b->mv[2][1], &b->mv[0][1]);
1886 AV_COPY32(&b->mv[3][1], &b->mv[0][1]);
1889 vref = b->ref[b->comp ? s->signbias[s->varcompref[0]] : 0];
1893 #define SPLAT_CTX(var, val, n) \
1895 case 1: var = val; break; \
1896 case 2: AV_WN16A(&var, val * 0x0101); break; \
1897 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1898 case 8: AV_WN64A(&var, val * 0x0101010101010101ULL); break; \
1900 uint64_t v64 = val * 0x0101010101010101ULL; \
1901 AV_WN64A( &var, v64); \
1902 AV_WN64A(&((uint8_t *) &var)[8], v64); \
1907 #define SPLAT_CTX(var, val, n) \
1909 case 1: var = val; break; \
1910 case 2: AV_WN16A(&var, val * 0x0101); break; \
1911 case 4: AV_WN32A(&var, val * 0x01010101); break; \
1913 uint32_t v32 = val * 0x01010101; \
1914 AV_WN32A( &var, v32); \
1915 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1919 uint32_t v32 = val * 0x01010101; \
1920 AV_WN32A( &var, v32); \
1921 AV_WN32A(&((uint8_t *) &var)[4], v32); \
1922 AV_WN32A(&((uint8_t *) &var)[8], v32); \
1923 AV_WN32A(&((uint8_t *) &var)[12], v32); \
1929 switch (bwh_tab[1][b->bs][0]) {
1930 #define SET_CTXS(dir, off, n) \
1932 SPLAT_CTX(s->dir##_skip_ctx[off], b->skip, n); \
1933 SPLAT_CTX(s->dir##_txfm_ctx[off], b->tx, n); \
1934 SPLAT_CTX(s->dir##_partition_ctx[off], dir##_ctx[b->bs], n); \
1935 if (!s->keyframe && !s->intraonly) { \
1936 SPLAT_CTX(s->dir##_intra_ctx[off], b->intra, n); \
1937 SPLAT_CTX(s->dir##_comp_ctx[off], b->comp, n); \
1938 SPLAT_CTX(s->dir##_mode_ctx[off], b->mode[3], n); \
1940 SPLAT_CTX(s->dir##_ref_ctx[off], vref, n); \
1941 if (s->filtermode == FILTER_SWITCHABLE) { \
1942 SPLAT_CTX(s->dir##_filter_ctx[off], filter_id, n); \
1947 case 1: SET_CTXS(above, col, 1); break;
1948 case 2: SET_CTXS(above, col, 2); break;
1949 case 4: SET_CTXS(above, col, 4); break;
1950 case 8: SET_CTXS(above, col, 8); break;
1952 switch (bwh_tab[1][b->bs][1]) {
1953 case 1: SET_CTXS(left, row7, 1); break;
1954 case 2: SET_CTXS(left, row7, 2); break;
1955 case 4: SET_CTXS(left, row7, 4); break;
1956 case 8: SET_CTXS(left, row7, 8); break;
1961 if (!s->keyframe && !s->intraonly) {
1962 if (b->bs > BS_8x8) {
1963 int mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1965 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][0], &b->mv[1][0]);
1966 AV_COPY32(&s->left_mv_ctx[row7 * 2 + 0][1], &b->mv[1][1]);
1967 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][0], mv0);
1968 AV_WN32A(&s->left_mv_ctx[row7 * 2 + 1][1], mv1);
1969 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][0], &b->mv[2][0]);
1970 AV_COPY32(&s->above_mv_ctx[col * 2 + 0][1], &b->mv[2][1]);
1971 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][0], mv0);
1972 AV_WN32A(&s->above_mv_ctx[col * 2 + 1][1], mv1);
1974 int n, mv0 = AV_RN32A(&b->mv[3][0]), mv1 = AV_RN32A(&b->mv[3][1]);
1976 for (n = 0; n < w4 * 2; n++) {
1977 AV_WN32A(&s->above_mv_ctx[col * 2 + n][0], mv0);
1978 AV_WN32A(&s->above_mv_ctx[col * 2 + n][1], mv1);
1980 for (n = 0; n < h4 * 2; n++) {
1981 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][0], mv0);
1982 AV_WN32A(&s->left_mv_ctx[row7 * 2 + n][1], mv1);
1988 for (y = 0; y < h4; y++) {
1989 int x, o = (row + y) * s->sb_cols * 8 + col;
1990 struct VP9mvrefPair *mv = &s->frames[CUR_FRAME].mv[o];
1993 for (x = 0; x < w4; x++) {
1997 } else if (b->comp) {
1998 for (x = 0; x < w4; x++) {
1999 mv[x].ref[0] = b->ref[0];
2000 mv[x].ref[1] = b->ref[1];
2001 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2002 AV_COPY32(&mv[x].mv[1], &b->mv[3][1]);
2005 for (x = 0; x < w4; x++) {
2006 mv[x].ref[0] = b->ref[0];
2008 AV_COPY32(&mv[x].mv[0], &b->mv[3][0]);
2014 // FIXME merge cnt/eob arguments?
2015 static av_always_inline int
2016 decode_coeffs_b_generic(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2017 int is_tx32x32, unsigned (*cnt)[6][3],
2018 unsigned (*eob)[6][2], uint8_t (*p)[6][11],
2019 int nnz, const int16_t *scan, const int16_t (*nb)[2],
2020 const int16_t *band_counts, const int16_t *qmul)
2022 int i = 0, band = 0, band_left = band_counts[band];
2023 uint8_t *tp = p[0][nnz];
2024 uint8_t cache[1024];
2029 val = vp56_rac_get_prob_branchy(c, tp[0]); // eob
2030 eob[band][nnz][val]++;
2035 if (!vp56_rac_get_prob_branchy(c, tp[1])) { // zero
2036 cnt[band][nnz][0]++;
2038 band_left = band_counts[++band];
2040 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2042 if (++i == n_coeffs)
2043 break; //invalid input; blocks should end with EOB
2048 if (!vp56_rac_get_prob_branchy(c, tp[2])) { // one
2049 cnt[band][nnz][1]++;
2053 // fill in p[3-10] (model fill) - only once per frame for each pos
2055 memcpy(&tp[3], vp9_model_pareto8[tp[2]], 8);
2057 cnt[band][nnz][2]++;
2058 if (!vp56_rac_get_prob_branchy(c, tp[3])) { // 2, 3, 4
2059 if (!vp56_rac_get_prob_branchy(c, tp[4])) {
2060 cache[rc] = val = 2;
2062 val = 3 + vp56_rac_get_prob(c, tp[5]);
2065 } else if (!vp56_rac_get_prob_branchy(c, tp[6])) { // cat1/2
2067 if (!vp56_rac_get_prob_branchy(c, tp[7])) {
2068 val = 5 + vp56_rac_get_prob(c, 159);
2070 val = 7 + (vp56_rac_get_prob(c, 165) << 1);
2071 val += vp56_rac_get_prob(c, 145);
2075 if (!vp56_rac_get_prob_branchy(c, tp[8])) {
2076 if (!vp56_rac_get_prob_branchy(c, tp[9])) {
2077 val = 11 + (vp56_rac_get_prob(c, 173) << 2);
2078 val += (vp56_rac_get_prob(c, 148) << 1);
2079 val += vp56_rac_get_prob(c, 140);
2081 val = 19 + (vp56_rac_get_prob(c, 176) << 3);
2082 val += (vp56_rac_get_prob(c, 155) << 2);
2083 val += (vp56_rac_get_prob(c, 140) << 1);
2084 val += vp56_rac_get_prob(c, 135);
2086 } else if (!vp56_rac_get_prob_branchy(c, tp[10])) {
2087 val = 35 + (vp56_rac_get_prob(c, 180) << 4);
2088 val += (vp56_rac_get_prob(c, 157) << 3);
2089 val += (vp56_rac_get_prob(c, 141) << 2);
2090 val += (vp56_rac_get_prob(c, 134) << 1);
2091 val += vp56_rac_get_prob(c, 130);
2093 val = 67 + (vp56_rac_get_prob(c, 254) << 13);
2094 val += (vp56_rac_get_prob(c, 254) << 12);
2095 val += (vp56_rac_get_prob(c, 254) << 11);
2096 val += (vp56_rac_get_prob(c, 252) << 10);
2097 val += (vp56_rac_get_prob(c, 249) << 9);
2098 val += (vp56_rac_get_prob(c, 243) << 8);
2099 val += (vp56_rac_get_prob(c, 230) << 7);
2100 val += (vp56_rac_get_prob(c, 196) << 6);
2101 val += (vp56_rac_get_prob(c, 177) << 5);
2102 val += (vp56_rac_get_prob(c, 153) << 4);
2103 val += (vp56_rac_get_prob(c, 140) << 3);
2104 val += (vp56_rac_get_prob(c, 133) << 2);
2105 val += (vp56_rac_get_prob(c, 130) << 1);
2106 val += vp56_rac_get_prob(c, 129);
2111 band_left = band_counts[++band];
2113 coef[rc] = ((vp8_rac_get(c) ? -val : val) * qmul[!!i]) / 2;
2115 coef[rc] = (vp8_rac_get(c) ? -val : val) * qmul[!!i];
2116 nnz = (1 + cache[nb[i][0]] + cache[nb[i][1]]) >> 1;
2118 } while (++i < n_coeffs);
2123 static int decode_coeffs_b(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2124 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2125 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2126 const int16_t (*nb)[2], const int16_t *band_counts,
2127 const int16_t *qmul)
2129 return decode_coeffs_b_generic(c, coef, n_coeffs, 0, cnt, eob, p,
2130 nnz, scan, nb, band_counts, qmul);
2133 static int decode_coeffs_b32(VP56RangeCoder *c, int16_t *coef, int n_coeffs,
2134 unsigned (*cnt)[6][3], unsigned (*eob)[6][2],
2135 uint8_t (*p)[6][11], int nnz, const int16_t *scan,
2136 const int16_t (*nb)[2], const int16_t *band_counts,
2137 const int16_t *qmul)
2139 return decode_coeffs_b_generic(c, coef, n_coeffs, 1, cnt, eob, p,
2140 nnz, scan, nb, band_counts, qmul);
2143 static void decode_coeffs(AVCodecContext *ctx)
2145 VP9Context *s = ctx->priv_data;
2147 int row = s->row, col = s->col;
2148 uint8_t (*p)[6][11] = s->prob.coef[b->tx][0 /* y */][!b->intra];
2149 unsigned (*c)[6][3] = s->counts.coef[b->tx][0 /* y */][!b->intra];
2150 unsigned (*e)[6][2] = s->counts.eob[b->tx][0 /* y */][!b->intra];
2151 int w4 = bwh_tab[1][b->bs][0] << 1, h4 = bwh_tab[1][b->bs][1] << 1;
2152 int end_x = FFMIN(2 * (s->cols - col), w4);
2153 int end_y = FFMIN(2 * (s->rows - row), h4);
2154 int n, pl, x, y, res;
2155 int16_t (*qmul)[2] = s->segmentation.feat[b->seg_id].qmul;
2156 int tx = 4 * s->lossless + b->tx;
2157 const int16_t * const *yscans = vp9_scans[tx];
2158 const int16_t (* const *ynbs)[2] = vp9_scans_nb[tx];
2159 const int16_t *uvscan = vp9_scans[b->uvtx][DCT_DCT];
2160 const int16_t (*uvnb)[2] = vp9_scans_nb[b->uvtx][DCT_DCT];
2161 uint8_t *a = &s->above_y_nnz_ctx[col * 2];
2162 uint8_t *l = &s->left_y_nnz_ctx[(row & 7) << 1];
2163 static const int16_t band_counts[4][8] = {
2164 { 1, 2, 3, 4, 3, 16 - 13 },
2165 { 1, 2, 3, 4, 11, 64 - 21 },
2166 { 1, 2, 3, 4, 11, 256 - 21 },
2167 { 1, 2, 3, 4, 11, 1024 - 21 },
2169 const int16_t *y_band_counts = band_counts[b->tx];
2170 const int16_t *uv_band_counts = band_counts[b->uvtx];
2172 #define MERGE(la, end, step, rd) \
2173 for (n = 0; n < end; n += step) \
2174 la[n] = !!rd(&la[n])
2175 #define MERGE_CTX(step, rd) \
2177 MERGE(l, end_y, step, rd); \
2178 MERGE(a, end_x, step, rd); \
2181 #define DECODE_Y_COEF_LOOP(step, mode_index, v) \
2182 for (n = 0, y = 0; y < end_y; y += step) { \
2183 for (x = 0; x < end_x; x += step, n += step * step) { \
2184 enum TxfmType txtp = vp9_intra_txfm_type[b->mode[mode_index]]; \
2185 res = decode_coeffs_b##v(&s->c, s->block + 16 * n, 16 * step * step, \
2186 c, e, p, a[x] + l[y], yscans[txtp], \
2187 ynbs[txtp], y_band_counts, qmul[0]); \
2188 a[x] = l[y] = !!res; \
2190 AV_WN16A(&s->eob[n], res); \
2197 #define SPLAT(la, end, step, cond) \
2199 for (n = 1; n < end; n += step) \
2200 la[n] = la[n - 1]; \
2201 } else if (step == 4) { \
2203 for (n = 0; n < end; n += step) \
2204 AV_WN32A(&la[n], la[n] * 0x01010101); \
2206 for (n = 0; n < end; n += step) \
2207 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 3)); \
2209 } else /* step == 8 */ { \
2211 if (HAVE_FAST_64BIT) { \
2212 for (n = 0; n < end; n += step) \
2213 AV_WN64A(&la[n], la[n] * 0x0101010101010101ULL); \
2215 for (n = 0; n < end; n += step) { \
2216 uint32_t v32 = la[n] * 0x01010101; \
2217 AV_WN32A(&la[n], v32); \
2218 AV_WN32A(&la[n + 4], v32); \
2222 for (n = 0; n < end; n += step) \
2223 memset(&la[n + 1], la[n], FFMIN(end - n - 1, 7)); \
2226 #define SPLAT_CTX(step) \
2228 SPLAT(a, end_x, step, end_x == w4); \
2229 SPLAT(l, end_y, step, end_y == h4); \
2235 DECODE_Y_COEF_LOOP(1, b->bs > BS_8x8 ? n : 0,);
2238 MERGE_CTX(2, AV_RN16A);
2239 DECODE_Y_COEF_LOOP(2, 0,);
2243 MERGE_CTX(4, AV_RN32A);
2244 DECODE_Y_COEF_LOOP(4, 0,);
2248 MERGE_CTX(8, AV_RN64A);
2249 DECODE_Y_COEF_LOOP(8, 0, 32);
2254 #define DECODE_UV_COEF_LOOP(step) \
2255 for (n = 0, y = 0; y < end_y; y += step) { \
2256 for (x = 0; x < end_x; x += step, n += step * step) { \
2257 res = decode_coeffs_b(&s->c, s->uvblock[pl] + 16 * n, \
2258 16 * step * step, c, e, p, a[x] + l[y], \
2259 uvscan, uvnb, uv_band_counts, qmul[1]); \
2260 a[x] = l[y] = !!res; \
2262 AV_WN16A(&s->uveob[pl][n], res); \
2264 s->uveob[pl][n] = res; \
2269 p = s->prob.coef[b->uvtx][1 /* uv */][!b->intra];
2270 c = s->counts.coef[b->uvtx][1 /* uv */][!b->intra];
2271 e = s->counts.eob[b->uvtx][1 /* uv */][!b->intra];
2276 for (pl = 0; pl < 2; pl++) {
2277 a = &s->above_uv_nnz_ctx[pl][col];
2278 l = &s->left_uv_nnz_ctx[pl][row & 7];
2281 DECODE_UV_COEF_LOOP(1);
2284 MERGE_CTX(2, AV_RN16A);
2285 DECODE_UV_COEF_LOOP(2);
2289 MERGE_CTX(4, AV_RN32A);
2290 DECODE_UV_COEF_LOOP(4);
2294 MERGE_CTX(8, AV_RN64A);
2295 // a 64x64 (max) uv block can ever only contain 1 tx32x32 block
2296 // so there is no need to loop
2297 res = decode_coeffs_b32(&s->c, s->uvblock[pl],
2298 1024, c, e, p, a[0] + l[0],
2299 uvscan, uvnb, uv_band_counts, qmul[1]);
2300 a[0] = l[0] = !!res;
2301 AV_WN16A(&s->uveob[pl][0], res);
2308 static av_always_inline int check_intra_mode(VP9Context *s, int mode, uint8_t **a,
2309 uint8_t *dst_edge, ptrdiff_t stride_edge,
2310 uint8_t *dst_inner, ptrdiff_t stride_inner,
2311 uint8_t *l, int col, int x, int w,
2312 int row, int y, enum TxfmMode tx,
2315 int have_top = row > 0 || y > 0;
2316 int have_left = col > s->tiling.tile_col_start || x > 0;
2317 int have_right = x < w - 1;
2318 static const uint8_t mode_conv[10][2 /* have_left */][2 /* have_top */] = {
2319 [VERT_PRED] = { { DC_127_PRED, VERT_PRED },
2320 { DC_127_PRED, VERT_PRED } },
2321 [HOR_PRED] = { { DC_129_PRED, DC_129_PRED },
2322 { HOR_PRED, HOR_PRED } },
2323 [DC_PRED] = { { DC_128_PRED, TOP_DC_PRED },
2324 { LEFT_DC_PRED, DC_PRED } },
2325 [DIAG_DOWN_LEFT_PRED] = { { DC_127_PRED, DIAG_DOWN_LEFT_PRED },
2326 { DC_127_PRED, DIAG_DOWN_LEFT_PRED } },
2327 [DIAG_DOWN_RIGHT_PRED] = { { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED },
2328 { DIAG_DOWN_RIGHT_PRED, DIAG_DOWN_RIGHT_PRED } },
2329 [VERT_RIGHT_PRED] = { { VERT_RIGHT_PRED, VERT_RIGHT_PRED },
2330 { VERT_RIGHT_PRED, VERT_RIGHT_PRED } },
2331 [HOR_DOWN_PRED] = { { HOR_DOWN_PRED, HOR_DOWN_PRED },
2332 { HOR_DOWN_PRED, HOR_DOWN_PRED } },
2333 [VERT_LEFT_PRED] = { { DC_127_PRED, VERT_LEFT_PRED },
2334 { DC_127_PRED, VERT_LEFT_PRED } },
2335 [HOR_UP_PRED] = { { DC_129_PRED, DC_129_PRED },
2336 { HOR_UP_PRED, HOR_UP_PRED } },
2337 [TM_VP8_PRED] = { { DC_129_PRED, VERT_PRED },
2338 { HOR_PRED, TM_VP8_PRED } },
2340 static const struct {
2341 uint8_t needs_left:1;
2342 uint8_t needs_top:1;
2343 uint8_t needs_topleft:1;
2344 uint8_t needs_topright:1;
2345 } edges[N_INTRA_PRED_MODES] = {
2346 [VERT_PRED] = { .needs_top = 1 },
2347 [HOR_PRED] = { .needs_left = 1 },
2348 [DC_PRED] = { .needs_top = 1, .needs_left = 1 },
2349 [DIAG_DOWN_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2350 [DIAG_DOWN_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2351 [VERT_RIGHT_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2352 [HOR_DOWN_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2353 [VERT_LEFT_PRED] = { .needs_top = 1, .needs_topright = 1 },
2354 [HOR_UP_PRED] = { .needs_left = 1 },
2355 [TM_VP8_PRED] = { .needs_left = 1, .needs_top = 1, .needs_topleft = 1 },
2356 [LEFT_DC_PRED] = { .needs_left = 1 },
2357 [TOP_DC_PRED] = { .needs_top = 1 },
2358 [DC_128_PRED] = { 0 },
2359 [DC_127_PRED] = { 0 },
2360 [DC_129_PRED] = { 0 }
2363 av_assert2(mode >= 0 && mode < 10);
2364 mode = mode_conv[mode][have_left][have_top];
2365 if (edges[mode].needs_top) {
2366 uint8_t *top, *topleft;
2367 int n_px_need = 4 << tx, n_px_have = (((s->cols - col) << !p) - x) * 4;
2368 int n_px_need_tr = 0;
2370 if (tx == TX_4X4 && edges[mode].needs_topright && have_right)
2373 // if top of sb64-row, use s->intra_pred_data[] instead of
2374 // dst[-stride] for intra prediction (it contains pre- instead of
2375 // post-loopfilter data)
2377 top = !(row & 7) && !y ?
2378 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2379 y == 0 ? &dst_edge[-stride_edge] : &dst_inner[-stride_inner];
2381 topleft = !(row & 7) && !y ?
2382 s->intra_pred_data[p] + col * (8 >> !!p) + x * 4 :
2383 y == 0 || x == 0 ? &dst_edge[-stride_edge] :
2384 &dst_inner[-stride_inner];
2388 (!edges[mode].needs_topleft || (have_left && top == topleft)) &&
2389 (tx != TX_4X4 || !edges[mode].needs_topright || have_right) &&
2390 n_px_need + n_px_need_tr <= n_px_have) {
2394 if (n_px_need <= n_px_have) {
2395 memcpy(*a, top, n_px_need);
2397 memcpy(*a, top, n_px_have);
2398 memset(&(*a)[n_px_have], (*a)[n_px_have - 1],
2399 n_px_need - n_px_have);
2402 memset(*a, 127, n_px_need);
2404 if (edges[mode].needs_topleft) {
2405 if (have_left && have_top) {
2406 (*a)[-1] = topleft[-1];
2408 (*a)[-1] = have_top ? 129 : 127;
2411 if (tx == TX_4X4 && edges[mode].needs_topright) {
2412 if (have_top && have_right &&
2413 n_px_need + n_px_need_tr <= n_px_have) {
2414 memcpy(&(*a)[4], &top[4], 4);
2416 memset(&(*a)[4], (*a)[3], 4);
2421 if (edges[mode].needs_left) {
2423 int n_px_need = 4 << tx, i, n_px_have = (((s->rows - row) << !p) - y) * 4;
2424 uint8_t *dst = x == 0 ? dst_edge : dst_inner;
2425 ptrdiff_t stride = x == 0 ? stride_edge : stride_inner;
2427 if (n_px_need <= n_px_have) {
2428 for (i = 0; i < n_px_need; i++)
2429 l[n_px_need - 1 - i] = dst[i * stride - 1];
2431 for (i = 0; i < n_px_have; i++)
2432 l[n_px_need - 1 - i] = dst[i * stride - 1];
2433 memset(l, l[n_px_need - n_px_have], n_px_need - n_px_have);
2436 memset(l, 129, 4 << tx);
2443 static void intra_recon(AVCodecContext *ctx, ptrdiff_t y_off, ptrdiff_t uv_off)
2445 VP9Context *s = ctx->priv_data;
2447 int row = s->row, col = s->col;
2448 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2449 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2450 int end_x = FFMIN(2 * (s->cols - col), w4);
2451 int end_y = FFMIN(2 * (s->rows - row), h4);
2452 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2453 int uvstep1d = 1 << b->uvtx, p;
2454 uint8_t *dst = s->dst[0], *dst_r = s->frames[CUR_FRAME].tf.f->data[0] + y_off;
2455 LOCAL_ALIGNED_32(uint8_t, a_buf, [64]);
2456 LOCAL_ALIGNED_32(uint8_t, l, [32]);
2458 for (n = 0, y = 0; y < end_y; y += step1d) {
2459 uint8_t *ptr = dst, *ptr_r = dst_r;
2460 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d,
2461 ptr_r += 4 * step1d, n += step) {
2462 int mode = b->mode[b->bs > BS_8x8 && b->tx == TX_4X4 ?
2464 uint8_t *a = &a_buf[32];
2465 enum TxfmType txtp = vp9_intra_txfm_type[mode];
2466 int eob = b->skip ? 0 : b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2468 mode = check_intra_mode(s, mode, &a, ptr_r,
2469 s->frames[CUR_FRAME].tf.f->linesize[0],
2470 ptr, s->y_stride, l,
2471 col, x, w4, row, y, b->tx, 0);
2472 s->dsp.intra_pred[b->tx][mode](ptr, s->y_stride, l, a);
2474 s->dsp.itxfm_add[tx][txtp](ptr, s->y_stride,
2475 s->block + 16 * n, eob);
2477 dst_r += 4 * step1d * s->frames[CUR_FRAME].tf.f->linesize[0];
2478 dst += 4 * step1d * s->y_stride;
2486 step = 1 << (b->uvtx * 2);
2487 for (p = 0; p < 2; p++) {
2488 dst = s->dst[1 + p];
2489 dst_r = s->frames[CUR_FRAME].tf.f->data[1 + p] + uv_off;
2490 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2491 uint8_t *ptr = dst, *ptr_r = dst_r;
2492 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d,
2493 ptr_r += 4 * uvstep1d, n += step) {
2494 int mode = b->uvmode;
2495 uint8_t *a = &a_buf[16];
2496 int eob = b->skip ? 0 : b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2498 mode = check_intra_mode(s, mode, &a, ptr_r,
2499 s->frames[CUR_FRAME].tf.f->linesize[1],
2500 ptr, s->uv_stride, l,
2501 col, x, w4, row, y, b->uvtx, p + 1);
2502 s->dsp.intra_pred[b->uvtx][mode](ptr, s->uv_stride, l, a);
2504 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2505 s->uvblock[p] + 16 * n, eob);
2507 dst_r += 4 * uvstep1d * s->frames[CUR_FRAME].tf.f->linesize[1];
2508 dst += 4 * uvstep1d * s->uv_stride;
2513 static av_always_inline void mc_luma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2514 uint8_t *dst, ptrdiff_t dst_stride,
2515 const uint8_t *ref, ptrdiff_t ref_stride,
2516 ThreadFrame *ref_frame,
2517 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2518 int bw, int bh, int w, int h)
2520 int mx = mv->x, my = mv->y, th;
2524 ref += y * ref_stride + x;
2527 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2528 // we use +7 because the last 7 pixels of each sbrow can be changed in
2529 // the longest loopfilter of the next sbrow
2530 th = (y + bh + 4 * !!my + 7) >> 6;
2531 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2532 if (x < !!mx * 3 || y < !!my * 3 ||
2533 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2534 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2535 ref - !!my * 3 * ref_stride - !!mx * 3,
2537 bw + !!mx * 7, bh + !!my * 7,
2538 x - !!mx * 3, y - !!my * 3, w, h);
2539 ref = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2542 mc[!!mx][!!my](dst, dst_stride, ref, ref_stride, bh, mx << 1, my << 1);
2545 static av_always_inline void mc_chroma_dir(VP9Context *s, vp9_mc_func (*mc)[2],
2546 uint8_t *dst_u, uint8_t *dst_v,
2547 ptrdiff_t dst_stride,
2548 const uint8_t *ref_u, ptrdiff_t src_stride_u,
2549 const uint8_t *ref_v, ptrdiff_t src_stride_v,
2550 ThreadFrame *ref_frame,
2551 ptrdiff_t y, ptrdiff_t x, const VP56mv *mv,
2552 int bw, int bh, int w, int h)
2554 int mx = mv->x, my = mv->y, th;
2558 ref_u += y * src_stride_u + x;
2559 ref_v += y * src_stride_v + x;
2562 // FIXME bilinear filter only needs 0/1 pixels, not 3/4
2563 // we use +7 because the last 7 pixels of each sbrow can be changed in
2564 // the longest loopfilter of the next sbrow
2565 th = (y + bh + 4 * !!my + 7) >> 5;
2566 ff_thread_await_progress(ref_frame, FFMAX(th, 0), 0);
2567 if (x < !!mx * 3 || y < !!my * 3 ||
2568 x + !!mx * 4 > w - bw || y + !!my * 4 > h - bh) {
2569 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2570 ref_u - !!my * 3 * src_stride_u - !!mx * 3,
2572 bw + !!mx * 7, bh + !!my * 7,
2573 x - !!mx * 3, y - !!my * 3, w, h);
2574 ref_u = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2575 mc[!!mx][!!my](dst_u, dst_stride, ref_u, 80, bh, mx, my);
2577 s->vdsp.emulated_edge_mc(s->edge_emu_buffer,
2578 ref_v - !!my * 3 * src_stride_v - !!mx * 3,
2580 bw + !!mx * 7, bh + !!my * 7,
2581 x - !!mx * 3, y - !!my * 3, w, h);
2582 ref_v = s->edge_emu_buffer + !!my * 3 * 80 + !!mx * 3;
2583 mc[!!mx][!!my](dst_v, dst_stride, ref_v, 80, bh, mx, my);
2585 mc[!!mx][!!my](dst_u, dst_stride, ref_u, src_stride_u, bh, mx, my);
2586 mc[!!mx][!!my](dst_v, dst_stride, ref_v, src_stride_v, bh, mx, my);
2590 static void inter_recon(AVCodecContext *ctx)
2592 static const uint8_t bwlog_tab[2][N_BS_SIZES] = {
2593 { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 },
2594 { 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4 },
2596 VP9Context *s = ctx->priv_data;
2598 int row = s->row, col = s->col;
2599 ThreadFrame *tref1 = &s->refs[s->refidx[b->ref[0]]], *tref2;
2600 AVFrame *ref1 = tref1->f, *ref2;
2601 int w1 = ref1->width, h1 = ref1->height, w2, h2;
2602 ptrdiff_t ls_y = s->y_stride, ls_uv = s->uv_stride;
2605 tref2 = &s->refs[s->refidx[b->ref[1]]];
2612 if (b->bs > BS_8x8) {
2613 if (b->bs == BS_8x4) {
2614 mc_luma_dir(s, s->dsp.mc[3][b->filter][0], s->dst[0], ls_y,
2615 ref1->data[0], ref1->linesize[0], tref1,
2616 row << 3, col << 3, &b->mv[0][0], 8, 4, w1, h1);
2617 mc_luma_dir(s, s->dsp.mc[3][b->filter][0],
2618 s->dst[0] + 4 * ls_y, ls_y,
2619 ref1->data[0], ref1->linesize[0], tref1,
2620 (row << 3) + 4, col << 3, &b->mv[2][0], 8, 4, w1, h1);
2623 mc_luma_dir(s, s->dsp.mc[3][b->filter][1], s->dst[0], ls_y,
2624 ref2->data[0], ref2->linesize[0], tref2,
2625 row << 3, col << 3, &b->mv[0][1], 8, 4, w2, h2);
2626 mc_luma_dir(s, s->dsp.mc[3][b->filter][1],
2627 s->dst[0] + 4 * ls_y, ls_y,
2628 ref2->data[0], ref2->linesize[0], tref2,
2629 (row << 3) + 4, col << 3, &b->mv[2][1], 8, 4, w2, h2);
2631 } else if (b->bs == BS_4x8) {
2632 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2633 ref1->data[0], ref1->linesize[0], tref1,
2634 row << 3, col << 3, &b->mv[0][0], 4, 8, w1, h1);
2635 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2636 ref1->data[0], ref1->linesize[0], tref1,
2637 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 8, w1, h1);
2640 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2641 ref2->data[0], ref2->linesize[0], tref2,
2642 row << 3, col << 3, &b->mv[0][1], 4, 8, w2, h2);
2643 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2644 ref2->data[0], ref2->linesize[0], tref2,
2645 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 8, w2, h2);
2648 av_assert2(b->bs == BS_4x4);
2650 // FIXME if two horizontally adjacent blocks have the same MV,
2651 // do a w8 instead of a w4 call
2652 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0], ls_y,
2653 ref1->data[0], ref1->linesize[0], tref1,
2654 row << 3, col << 3, &b->mv[0][0], 4, 4, w1, h1);
2655 mc_luma_dir(s, s->dsp.mc[4][b->filter][0], s->dst[0] + 4, ls_y,
2656 ref1->data[0], ref1->linesize[0], tref1,
2657 row << 3, (col << 3) + 4, &b->mv[1][0], 4, 4, w1, h1);
2658 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2659 s->dst[0] + 4 * ls_y, ls_y,
2660 ref1->data[0], ref1->linesize[0], tref1,
2661 (row << 3) + 4, col << 3, &b->mv[2][0], 4, 4, w1, h1);
2662 mc_luma_dir(s, s->dsp.mc[4][b->filter][0],
2663 s->dst[0] + 4 * ls_y + 4, ls_y,
2664 ref1->data[0], ref1->linesize[0], tref1,
2665 (row << 3) + 4, (col << 3) + 4, &b->mv[3][0], 4, 4, w1, h1);
2668 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0], ls_y,
2669 ref2->data[0], ref2->linesize[0], tref2,
2670 row << 3, col << 3, &b->mv[0][1], 4, 4, w2, h2);
2671 mc_luma_dir(s, s->dsp.mc[4][b->filter][1], s->dst[0] + 4, ls_y,
2672 ref2->data[0], ref2->linesize[0], tref2,
2673 row << 3, (col << 3) + 4, &b->mv[1][1], 4, 4, w2, h2);
2674 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2675 s->dst[0] + 4 * ls_y, ls_y,
2676 ref2->data[0], ref2->linesize[0], tref2,
2677 (row << 3) + 4, col << 3, &b->mv[2][1], 4, 4, w2, h2);
2678 mc_luma_dir(s, s->dsp.mc[4][b->filter][1],
2679 s->dst[0] + 4 * ls_y + 4, ls_y,
2680 ref2->data[0], ref2->linesize[0], tref2,
2681 (row << 3) + 4, (col << 3) + 4, &b->mv[3][1], 4, 4, w2, h2);
2685 int bwl = bwlog_tab[0][b->bs];
2686 int bw = bwh_tab[0][b->bs][0] * 4, bh = bwh_tab[0][b->bs][1] * 4;
2688 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][0], s->dst[0], ls_y,
2689 ref1->data[0], ref1->linesize[0], tref1,
2690 row << 3, col << 3, &b->mv[0][0],bw, bh, w1, h1);
2693 mc_luma_dir(s, s->dsp.mc[bwl][b->filter][1], s->dst[0], ls_y,
2694 ref2->data[0], ref2->linesize[0], tref2,
2695 row << 3, col << 3, &b->mv[0][1], bw, bh, w2, h2);
2700 int bwl = bwlog_tab[1][b->bs];
2701 int bw = bwh_tab[1][b->bs][0] * 4, bh = bwh_tab[1][b->bs][1] * 4;
2710 if (b->bs > BS_8x8) {
2711 mvuv.x = ROUNDED_DIV(b->mv[0][0].x + b->mv[1][0].x + b->mv[2][0].x + b->mv[3][0].x, 4);
2712 mvuv.y = ROUNDED_DIV(b->mv[0][0].y + b->mv[1][0].y + b->mv[2][0].y + b->mv[3][0].y, 4);
2717 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][0],
2718 s->dst[1], s->dst[2], ls_uv,
2719 ref1->data[1], ref1->linesize[1],
2720 ref1->data[2], ref1->linesize[2], tref1,
2721 row << 2, col << 2, &mvuv, bw, bh, w1, h1);
2724 if (b->bs > BS_8x8) {
2725 mvuv.x = ROUNDED_DIV(b->mv[0][1].x + b->mv[1][1].x + b->mv[2][1].x + b->mv[3][1].x, 4);
2726 mvuv.y = ROUNDED_DIV(b->mv[0][1].y + b->mv[1][1].y + b->mv[2][1].y + b->mv[3][1].y, 4);
2730 mc_chroma_dir(s, s->dsp.mc[bwl][b->filter][1],
2731 s->dst[1], s->dst[2], ls_uv,
2732 ref2->data[1], ref2->linesize[1],
2733 ref2->data[2], ref2->linesize[2], tref2,
2734 row << 2, col << 2, &mvuv, bw, bh, w2, h2);
2739 /* mostly copied intra_reconn() */
2741 int w4 = bwh_tab[1][b->bs][0] << 1, step1d = 1 << b->tx, n;
2742 int h4 = bwh_tab[1][b->bs][1] << 1, x, y, step = 1 << (b->tx * 2);
2743 int end_x = FFMIN(2 * (s->cols - col), w4);
2744 int end_y = FFMIN(2 * (s->rows - row), h4);
2745 int tx = 4 * s->lossless + b->tx, uvtx = b->uvtx + 4 * s->lossless;
2746 int uvstep1d = 1 << b->uvtx, p;
2747 uint8_t *dst = s->dst[0];
2750 for (n = 0, y = 0; y < end_y; y += step1d) {
2752 for (x = 0; x < end_x; x += step1d, ptr += 4 * step1d, n += step) {
2753 int eob = b->tx > TX_8X8 ? AV_RN16A(&s->eob[n]) : s->eob[n];
2756 s->dsp.itxfm_add[tx][DCT_DCT](ptr, s->y_stride,
2757 s->block + 16 * n, eob);
2759 dst += 4 * s->y_stride * step1d;
2767 step = 1 << (b->uvtx * 2);
2768 for (p = 0; p < 2; p++) {
2769 dst = s->dst[p + 1];
2770 for (n = 0, y = 0; y < end_y; y += uvstep1d) {
2772 for (x = 0; x < end_x; x += uvstep1d, ptr += 4 * uvstep1d, n += step) {
2773 int eob = b->uvtx > TX_8X8 ? AV_RN16A(&s->uveob[p][n]) : s->uveob[p][n];
2776 s->dsp.itxfm_add[uvtx][DCT_DCT](ptr, s->uv_stride,
2777 s->uvblock[p] + 16 * n, eob);
2779 dst += 4 * uvstep1d * s->uv_stride;
2785 static av_always_inline void mask_edges(struct VP9Filter *lflvl, int is_uv,
2786 int row_and_7, int col_and_7,
2787 int w, int h, int col_end, int row_end,
2788 enum TxfmMode tx, int skip_inter)
2790 // FIXME I'm pretty sure all loops can be replaced by a single LUT if
2791 // we make VP9Filter.mask uint64_t (i.e. row/col all single variable)
2792 // and make the LUT 5-indexed (bl, bp, is_uv, tx and row/col), and then
2793 // use row_and_7/col_and_7 as shifts (1*col_and_7+8*row_and_7)
2795 // the intended behaviour of the vp9 loopfilter is to work on 8-pixel
2796 // edges. This means that for UV, we work on two subsampled blocks at
2797 // a time, and we only use the topleft block's mode information to set
2798 // things like block strength. Thus, for any block size smaller than
2799 // 16x16, ignore the odd portion of the block.
2800 if (tx == TX_4X4 && is_uv) {
2815 if (tx == TX_4X4 && !skip_inter) {
2816 int t = 1 << col_and_7, m_col = (t << w) - t, y;
2817 int m_col_odd = (t << (w - 1)) - t;
2819 // on 32-px edges, use the 8-px wide loopfilter; else, use 4-px wide
2821 int m_row_8 = m_col & 0x01, m_row_4 = m_col - m_row_8;
2823 for (y = row_and_7; y < h + row_and_7; y++) {
2824 int col_mask_id = 2 - !(y & 7);
2826 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2827 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2828 // for odd lines, if the odd col is not being filtered,
2829 // skip odd row also:
2836 // if a/c are even row/col and b/d are odd, and d is skipped,
2837 // e.g. right edge of size-66x66.webm, then skip b also (bug)
2838 if ((col_end & 1) && (y & 1)) {
2839 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col_odd;
2841 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col;
2845 int m_row_8 = m_col & 0x11, m_row_4 = m_col - m_row_8;
2847 for (y = row_and_7; y < h + row_and_7; y++) {
2848 int col_mask_id = 2 - !(y & 3);
2850 lflvl->mask[is_uv][0][y][1] |= m_row_8; // row edge
2851 lflvl->mask[is_uv][0][y][2] |= m_row_4;
2852 lflvl->mask[is_uv][1][y][col_mask_id] |= m_col; // col edge
2853 lflvl->mask[is_uv][0][y][3] |= m_col;
2854 lflvl->mask[is_uv][1][y][3] |= m_col;
2858 int y, t = 1 << col_and_7, m_col = (t << w) - t;
2861 int mask_id = (tx == TX_8X8);
2862 int l2 = tx + is_uv - 1, step1d = 1 << l2;
2863 static const unsigned masks[4] = { 0xff, 0x55, 0x11, 0x01 };
2864 int m_row = m_col & masks[l2];
2866 // at odd UV col/row edges tx16/tx32 loopfilter edges, force
2867 // 8wd loopfilter to prevent going off the visible edge.
2868 if (is_uv && tx > TX_8X8 && (w ^ (w - 1)) == 1) {
2869 int m_row_16 = ((t << (w - 1)) - t) & masks[l2];
2870 int m_row_8 = m_row - m_row_16;
2872 for (y = row_and_7; y < h + row_and_7; y++) {
2873 lflvl->mask[is_uv][0][y][0] |= m_row_16;
2874 lflvl->mask[is_uv][0][y][1] |= m_row_8;
2877 for (y = row_and_7; y < h + row_and_7; y++)
2878 lflvl->mask[is_uv][0][y][mask_id] |= m_row;
2881 if (is_uv && tx > TX_8X8 && (h ^ (h - 1)) == 1) {
2882 for (y = row_and_7; y < h + row_and_7 - 1; y += step1d)
2883 lflvl->mask[is_uv][1][y][0] |= m_col;
2884 if (y - row_and_7 == h - 1)
2885 lflvl->mask[is_uv][1][y][1] |= m_col;
2887 for (y = row_and_7; y < h + row_and_7; y += step1d)
2888 lflvl->mask[is_uv][1][y][mask_id] |= m_col;
2890 } else if (tx != TX_4X4) {
2893 mask_id = (tx == TX_8X8) || (is_uv && h == 1);
2894 lflvl->mask[is_uv][1][row_and_7][mask_id] |= m_col;
2895 mask_id = (tx == TX_8X8) || (is_uv && w == 1);
2896 for (y = row_and_7; y < h + row_and_7; y++)
2897 lflvl->mask[is_uv][0][y][mask_id] |= t;
2899 int t8 = t & 0x01, t4 = t - t8;
2901 for (y = row_and_7; y < h + row_and_7; y++) {
2902 lflvl->mask[is_uv][0][y][2] |= t4;
2903 lflvl->mask[is_uv][0][y][1] |= t8;
2905 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 7)] |= m_col;
2907 int t8 = t & 0x11, t4 = t - t8;
2909 for (y = row_and_7; y < h + row_and_7; y++) {
2910 lflvl->mask[is_uv][0][y][2] |= t4;
2911 lflvl->mask[is_uv][0][y][1] |= t8;
2913 lflvl->mask[is_uv][1][row_and_7][2 - !(row_and_7 & 3)] |= m_col;
2918 static void decode_b(AVCodecContext *ctx, int row, int col,
2919 struct VP9Filter *lflvl, ptrdiff_t yoff, ptrdiff_t uvoff,
2920 enum BlockLevel bl, enum BlockPartition bp)
2922 VP9Context *s = ctx->priv_data;
2924 enum BlockSize bs = bl * 3 + bp;
2925 int w4 = bwh_tab[1][bs][0], h4 = bwh_tab[1][bs][1], lvl;
2927 AVFrame *f = s->frames[CUR_FRAME].tf.f;
2933 s->min_mv.x = -(128 + col * 64);
2934 s->min_mv.y = -(128 + row * 64);
2935 s->max_mv.x = 128 + (s->cols - col - w4) * 64;
2936 s->max_mv.y = 128 + (s->rows - row - h4) * 64;
2942 b->uvtx = b->tx - (w4 * 2 == (1 << b->tx) || h4 * 2 == (1 << b->tx));
2949 #define SPLAT_ZERO_CTX(v, n) \
2951 case 1: v = 0; break; \
2952 case 2: AV_ZERO16(&v); break; \
2953 case 4: AV_ZERO32(&v); break; \
2954 case 8: AV_ZERO64(&v); break; \
2955 case 16: AV_ZERO128(&v); break; \
2957 #define SPLAT_ZERO_YUV(dir, var, off, n) \
2959 SPLAT_ZERO_CTX(s->dir##_y_##var[off * 2], n * 2); \
2960 SPLAT_ZERO_CTX(s->dir##_uv_##var[0][off], n); \
2961 SPLAT_ZERO_CTX(s->dir##_uv_##var[1][off], n); \
2965 case 1: SPLAT_ZERO_YUV(above, nnz_ctx, col, 1); break;
2966 case 2: SPLAT_ZERO_YUV(above, nnz_ctx, col, 2); break;
2967 case 4: SPLAT_ZERO_YUV(above, nnz_ctx, col, 4); break;
2968 case 8: SPLAT_ZERO_YUV(above, nnz_ctx, col, 8); break;
2971 case 1: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 1); break;
2972 case 2: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 2); break;
2973 case 4: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 4); break;
2974 case 8: SPLAT_ZERO_YUV(left, nnz_ctx, row7, 8); break;
2979 s->block += w4 * h4 * 64;
2980 s->uvblock[0] += w4 * h4 * 16;
2981 s->uvblock[1] += w4 * h4 * 16;
2982 s->eob += 4 * w4 * h4;
2983 s->uveob[0] += w4 * h4;
2984 s->uveob[1] += w4 * h4;
2990 // emulated overhangs if the stride of the target buffer can't hold. This
2991 // allows to support emu-edge and so on even if we have large block
2993 emu[0] = (col + w4) * 8 > f->linesize[0] ||
2994 (row + h4) > s->rows;
2995 emu[1] = (col + w4) * 4 > f->linesize[1] ||
2996 (row + h4) > s->rows;
2998 s->dst[0] = s->tmp_y;
3001 s->dst[0] = f->data[0] + yoff;
3002 s->y_stride = f->linesize[0];
3005 s->dst[1] = s->tmp_uv[0];
3006 s->dst[2] = s->tmp_uv[1];
3009 s->dst[1] = f->data[1] + uvoff;
3010 s->dst[2] = f->data[2] + uvoff;
3011 s->uv_stride = f->linesize[1];
3014 intra_recon(ctx, yoff, uvoff);
3019 int w = FFMIN(s->cols - col, w4) * 8, h = FFMIN(s->rows - row, h4) * 8, n, o = 0;
3021 for (n = 0; o < w; n++) {
3026 s->dsp.mc[n][0][0][0][0](f->data[0] + yoff + o, f->linesize[0],
3027 s->tmp_y + o, 64, h, 0, 0);
3033 int w = FFMIN(s->cols - col, w4) * 4, h = FFMIN(s->rows - row, h4) * 4, n, o = 0;
3035 for (n = 1; o < w; n++) {
3040 s->dsp.mc[n][0][0][0][0](f->data[1] + uvoff + o, f->linesize[1],
3041 s->tmp_uv[0] + o, 32, h, 0, 0);
3042 s->dsp.mc[n][0][0][0][0](f->data[2] + uvoff + o, f->linesize[2],
3043 s->tmp_uv[1] + o, 32, h, 0, 0);
3049 // pick filter level and find edges to apply filter to
3050 if (s->filter.level &&
3051 (lvl = s->segmentation.feat[b->seg_id].lflvl[b->intra ? 0 : b->ref[0] + 1]
3052 [b->mode[3] != ZEROMV]) > 0) {
3053 int x_end = FFMIN(s->cols - col, w4), y_end = FFMIN(s->rows - row, h4);
3054 int skip_inter = !b->intra && b->skip, col7 = s->col7, row7 = s->row7;
3056 setctx_2d(&lflvl->level[row7 * 8 + col7], w4, h4, 8, lvl);
3057 mask_edges(lflvl, 0, row7, col7, x_end, y_end, 0, 0, b->tx, skip_inter);
3058 mask_edges(lflvl, 1, row7, col7, x_end, y_end,
3059 s->cols & 1 && col + w4 >= s->cols ? s->cols & 7 : 0,
3060 s->rows & 1 && row + h4 >= s->rows ? s->rows & 7 : 0,
3061 b->uvtx, skip_inter);
3063 if (!s->filter.lim_lut[lvl]) {
3064 int sharp = s->filter.sharpness;
3068 limit >>= (sharp + 3) >> 2;
3069 limit = FFMIN(limit, 9 - sharp);
3071 limit = FFMAX(limit, 1);
3073 s->filter.lim_lut[lvl] = limit;
3074 s->filter.mblim_lut[lvl] = 2 * (lvl + 2) + limit;
3080 s->block += w4 * h4 * 64;
3081 s->uvblock[0] += w4 * h4 * 16;
3082 s->uvblock[1] += w4 * h4 * 16;
3083 s->eob += 4 * w4 * h4;
3084 s->uveob[0] += w4 * h4;
3085 s->uveob[1] += w4 * h4;
3089 static void decode_sb(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3090 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3092 VP9Context *s = ctx->priv_data;
3093 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
3094 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
3095 const uint8_t *p = s->keyframe ? vp9_default_kf_partition_probs[bl][c] :
3096 s->prob.p.partition[bl][c];
3097 enum BlockPartition bp;
3098 ptrdiff_t hbs = 4 >> bl;
3099 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3100 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3103 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3104 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3105 } else if (col + hbs < s->cols) { // FIXME why not <=?
3106 if (row + hbs < s->rows) { // FIXME why not <=?
3107 bp = vp8_rac_get_tree(&s->c, vp9_partition_tree, p);
3109 case PARTITION_NONE:
3110 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3113 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3114 yoff += hbs * 8 * y_stride;
3115 uvoff += hbs * 4 * uv_stride;
3116 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
3119 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3122 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
3124 case PARTITION_SPLIT:
3125 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3126 decode_sb(ctx, row, col + hbs, lflvl,
3127 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3128 yoff += hbs * 8 * y_stride;
3129 uvoff += hbs * 4 * uv_stride;
3130 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3131 decode_sb(ctx, row + hbs, col + hbs, lflvl,
3132 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3137 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
3138 bp = PARTITION_SPLIT;
3139 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3140 decode_sb(ctx, row, col + hbs, lflvl,
3141 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3144 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3146 } else if (row + hbs < s->rows) { // FIXME why not <=?
3147 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
3148 bp = PARTITION_SPLIT;
3149 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3150 yoff += hbs * 8 * y_stride;
3151 uvoff += hbs * 4 * uv_stride;
3152 decode_sb(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3155 decode_b(ctx, row, col, lflvl, yoff, uvoff, bl, bp);
3158 bp = PARTITION_SPLIT;
3159 decode_sb(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3161 s->counts.partition[bl][c][bp]++;
3164 static void decode_sb_mem(AVCodecContext *ctx, int row, int col, struct VP9Filter *lflvl,
3165 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
3167 VP9Context *s = ctx->priv_data;
3169 ptrdiff_t hbs = 4 >> bl;
3170 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3171 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
3174 av_assert2(b->bl == BL_8X8);
3175 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3176 } else if (s->b->bl == bl) {
3177 decode_b(ctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
3178 if (b->bp == PARTITION_H && row + hbs < s->rows) {
3179 yoff += hbs * 8 * y_stride;
3180 uvoff += hbs * 4 * uv_stride;
3181 decode_b(ctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
3182 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
3185 decode_b(ctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
3188 decode_sb_mem(ctx, row, col, lflvl, yoff, uvoff, bl + 1);
3189 if (col + hbs < s->cols) { // FIXME why not <=?
3190 if (row + hbs < s->rows) {
3191 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff + 8 * hbs,
3192 uvoff + 4 * hbs, bl + 1);
3193 yoff += hbs * 8 * y_stride;
3194 uvoff += hbs * 4 * uv_stride;
3195 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3196 decode_sb_mem(ctx, row + hbs, col + hbs, lflvl,
3197 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
3201 decode_sb_mem(ctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
3203 } else if (row + hbs < s->rows) {
3204 yoff += hbs * 8 * y_stride;
3205 uvoff += hbs * 4 * uv_stride;
3206 decode_sb_mem(ctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
3211 static void loopfilter_sb(AVCodecContext *ctx, struct VP9Filter *lflvl,
3212 int row, int col, ptrdiff_t yoff, ptrdiff_t uvoff)
3214 VP9Context *s = ctx->priv_data;
3215 AVFrame *f = s->frames[CUR_FRAME].tf.f;
3216 uint8_t *dst = f->data[0] + yoff, *lvl = lflvl->level;
3217 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
3220 // FIXME in how far can we interleave the v/h loopfilter calls? E.g.
3221 // if you think of them as acting on a 8x8 block max, we can interleave
3222 // each v/h within the single x loop, but that only works if we work on
3223 // 8 pixel blocks, and we won't always do that (we want at least 16px
3224 // to use SSE2 optimizations, perhaps 32 for AVX2)
3226 // filter edges between columns, Y plane (e.g. block1 | block2)
3227 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
3228 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
3229 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
3230 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
3231 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
3232 unsigned hm = hm1 | hm2 | hm13 | hm23;
3234 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
3236 int L = *l, H = L >> 4;
3237 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3240 if (hmask1[0] & x) {
3241 if (hmask2[0] & x) {
3242 av_assert2(l[8] == L);
3243 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
3245 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
3247 } else if (hm2 & x) {
3250 E |= s->filter.mblim_lut[L] << 8;
3251 I |= s->filter.lim_lut[L] << 8;
3252 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3254 [0](ptr, ls_y, E, I, H);
3256 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3257 [0](ptr, ls_y, E, I, H);
3260 } else if (hm2 & x) {
3261 int L = l[8], H = L >> 4;
3262 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3265 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3266 [0](ptr + 8 * ls_y, ls_y, E, I, H);
3270 int L = *l, H = L >> 4;
3271 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3276 E |= s->filter.mblim_lut[L] << 8;
3277 I |= s->filter.lim_lut[L] << 8;
3278 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
3280 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
3282 } else if (hm23 & x) {
3283 int L = l[8], H = L >> 4;
3284 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3286 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
3292 // filter edges between rows, Y plane (e.g. ------)
3294 dst = f->data[0] + yoff;
3296 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
3297 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
3298 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
3300 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
3303 int L = *l, H = L >> 4;
3304 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3307 if (vmask[0] & (x << 1)) {
3308 av_assert2(l[1] == L);
3309 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
3311 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
3313 } else if (vm & (x << 1)) {
3316 E |= s->filter.mblim_lut[L] << 8;
3317 I |= s->filter.lim_lut[L] << 8;
3318 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3319 [!!(vmask[1] & (x << 1))]
3320 [1](ptr, ls_y, E, I, H);
3322 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3323 [1](ptr, ls_y, E, I, H);
3325 } else if (vm & (x << 1)) {
3326 int L = l[1], H = L >> 4;
3327 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3329 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
3330 [1](ptr + 8, ls_y, E, I, H);
3334 int L = *l, H = L >> 4;
3335 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3337 if (vm3 & (x << 1)) {
3340 E |= s->filter.mblim_lut[L] << 8;
3341 I |= s->filter.lim_lut[L] << 8;
3342 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
3344 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
3346 } else if (vm3 & (x << 1)) {
3347 int L = l[1], H = L >> 4;
3348 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3350 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
3355 // same principle but for U/V planes
3356 for (p = 0; p < 2; p++) {
3358 dst = f->data[1 + p] + uvoff;
3359 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
3360 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
3361 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
3362 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
3363 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
3365 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
3368 int L = *l, H = L >> 4;
3369 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3371 if (hmask1[0] & x) {
3372 if (hmask2[0] & x) {
3373 av_assert2(l[16] == L);
3374 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
3376 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
3378 } else if (hm2 & x) {
3381 E |= s->filter.mblim_lut[L] << 8;
3382 I |= s->filter.lim_lut[L] << 8;
3383 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
3385 [0](ptr, ls_uv, E, I, H);
3387 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
3388 [0](ptr, ls_uv, E, I, H);
3390 } else if (hm2 & x) {
3391 int L = l[16], H = L >> 4;
3392 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3394 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
3395 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
3403 dst = f->data[1 + p] + uvoff;
3404 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
3405 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
3406 unsigned vm = vmask[0] | vmask[1] | vmask[2];
3408 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
3411 int L = *l, H = L >> 4;
3412 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3415 if (vmask[0] & (x << 2)) {
3416 av_assert2(l[2] == L);
3417 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
3419 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
3421 } else if (vm & (x << 2)) {
3424 E |= s->filter.mblim_lut[L] << 8;
3425 I |= s->filter.lim_lut[L] << 8;
3426 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
3427 [!!(vmask[1] & (x << 2))]
3428 [1](ptr, ls_uv, E, I, H);
3430 s->dsp.loop_filter_8[!!(vmask[1] & x)]
3431 [1](ptr, ls_uv, E, I, H);
3433 } else if (vm & (x << 2)) {
3434 int L = l[2], H = L >> 4;
3435 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
3437 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
3438 [1](ptr + 8, ls_uv, E, I, H);
3448 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
3450 int sb_start = ( idx * n) >> log2_n;
3451 int sb_end = ((idx + 1) * n) >> log2_n;
3452 *start = FFMIN(sb_start, n) << 3;
3453 *end = FFMIN(sb_end, n) << 3;
3456 static av_always_inline void adapt_prob(uint8_t *p, unsigned ct0, unsigned ct1,
3457 int max_count, int update_factor)
3459 unsigned ct = ct0 + ct1, p2, p1;
3465 p2 = ((ct0 << 8) + (ct >> 1)) / ct;
3466 p2 = av_clip(p2, 1, 255);
3467 ct = FFMIN(ct, max_count);
3468 update_factor = FASTDIV(update_factor * ct, max_count);
3470 // (p1 * (256 - update_factor) + p2 * update_factor + 128) >> 8
3471 *p = p1 + (((p2 - p1) * update_factor + 128) >> 8);
3474 static void adapt_probs(VP9Context *s)
3477 prob_context *p = &s->prob_ctx[s->framectxid].p;
3478 int uf = (s->keyframe || s->intraonly || !s->last_keyframe) ? 112 : 128;
3481 for (i = 0; i < 4; i++)
3482 for (j = 0; j < 2; j++)
3483 for (k = 0; k < 2; k++)
3484 for (l = 0; l < 6; l++)
3485 for (m = 0; m < 6; m++) {
3486 uint8_t *pp = s->prob_ctx[s->framectxid].coef[i][j][k][l][m];
3487 unsigned *e = s->counts.eob[i][j][k][l][m];
3488 unsigned *c = s->counts.coef[i][j][k][l][m];
3490 if (l == 0 && m >= 3) // dc only has 3 pt
3493 adapt_prob(&pp[0], e[0], e[1], 24, uf);
3494 adapt_prob(&pp[1], c[0], c[1] + c[2], 24, uf);
3495 adapt_prob(&pp[2], c[1], c[2], 24, uf);
3498 if (s->keyframe || s->intraonly) {
3499 memcpy(p->skip, s->prob.p.skip, sizeof(p->skip));
3500 memcpy(p->tx32p, s->prob.p.tx32p, sizeof(p->tx32p));
3501 memcpy(p->tx16p, s->prob.p.tx16p, sizeof(p->tx16p));
3502 memcpy(p->tx8p, s->prob.p.tx8p, sizeof(p->tx8p));
3507 for (i = 0; i < 3; i++)
3508 adapt_prob(&p->skip[i], s->counts.skip[i][0], s->counts.skip[i][1], 20, 128);
3511 for (i = 0; i < 4; i++)
3512 adapt_prob(&p->intra[i], s->counts.intra[i][0], s->counts.intra[i][1], 20, 128);
3515 if (s->comppredmode == PRED_SWITCHABLE) {
3516 for (i = 0; i < 5; i++)
3517 adapt_prob(&p->comp[i], s->counts.comp[i][0], s->counts.comp[i][1], 20, 128);
3521 if (s->comppredmode != PRED_SINGLEREF) {
3522 for (i = 0; i < 5; i++)
3523 adapt_prob(&p->comp_ref[i], s->counts.comp_ref[i][0],
3524 s->counts.comp_ref[i][1], 20, 128);
3527 if (s->comppredmode != PRED_COMPREF) {
3528 for (i = 0; i < 5; i++) {
3529 uint8_t *pp = p->single_ref[i];
3530 unsigned (*c)[2] = s->counts.single_ref[i];
3532 adapt_prob(&pp[0], c[0][0], c[0][1], 20, 128);
3533 adapt_prob(&pp[1], c[1][0], c[1][1], 20, 128);
3537 // block partitioning
3538 for (i = 0; i < 4; i++)
3539 for (j = 0; j < 4; j++) {
3540 uint8_t *pp = p->partition[i][j];
3541 unsigned *c = s->counts.partition[i][j];
3543 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3544 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3545 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3549 if (s->txfmmode == TX_SWITCHABLE) {
3550 for (i = 0; i < 2; i++) {
3551 unsigned *c16 = s->counts.tx16p[i], *c32 = s->counts.tx32p[i];
3553 adapt_prob(&p->tx8p[i], s->counts.tx8p[i][0], s->counts.tx8p[i][1], 20, 128);
3554 adapt_prob(&p->tx16p[i][0], c16[0], c16[1] + c16[2], 20, 128);
3555 adapt_prob(&p->tx16p[i][1], c16[1], c16[2], 20, 128);
3556 adapt_prob(&p->tx32p[i][0], c32[0], c32[1] + c32[2] + c32[3], 20, 128);
3557 adapt_prob(&p->tx32p[i][1], c32[1], c32[2] + c32[3], 20, 128);
3558 adapt_prob(&p->tx32p[i][2], c32[2], c32[3], 20, 128);
3562 // interpolation filter
3563 if (s->filtermode == FILTER_SWITCHABLE) {
3564 for (i = 0; i < 4; i++) {
3565 uint8_t *pp = p->filter[i];
3566 unsigned *c = s->counts.filter[i];
3568 adapt_prob(&pp[0], c[0], c[1] + c[2], 20, 128);
3569 adapt_prob(&pp[1], c[1], c[2], 20, 128);
3574 for (i = 0; i < 7; i++) {
3575 uint8_t *pp = p->mv_mode[i];
3576 unsigned *c = s->counts.mv_mode[i];
3578 adapt_prob(&pp[0], c[2], c[1] + c[0] + c[3], 20, 128);
3579 adapt_prob(&pp[1], c[0], c[1] + c[3], 20, 128);
3580 adapt_prob(&pp[2], c[1], c[3], 20, 128);
3585 uint8_t *pp = p->mv_joint;
3586 unsigned *c = s->counts.mv_joint;
3588 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3589 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3590 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3594 for (i = 0; i < 2; i++) {
3596 unsigned *c, (*c2)[2], sum;
3598 adapt_prob(&p->mv_comp[i].sign, s->counts.mv_comp[i].sign[0],
3599 s->counts.mv_comp[i].sign[1], 20, 128);
3601 pp = p->mv_comp[i].classes;
3602 c = s->counts.mv_comp[i].classes;
3603 sum = c[1] + c[2] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9] + c[10];
3604 adapt_prob(&pp[0], c[0], sum, 20, 128);
3606 adapt_prob(&pp[1], c[1], sum, 20, 128);
3608 adapt_prob(&pp[2], c[2] + c[3], sum, 20, 128);
3609 adapt_prob(&pp[3], c[2], c[3], 20, 128);
3611 adapt_prob(&pp[4], c[4] + c[5], sum, 20, 128);
3612 adapt_prob(&pp[5], c[4], c[5], 20, 128);
3614 adapt_prob(&pp[6], c[6], sum, 20, 128);
3615 adapt_prob(&pp[7], c[7] + c[8], c[9] + c[10], 20, 128);
3616 adapt_prob(&pp[8], c[7], c[8], 20, 128);
3617 adapt_prob(&pp[9], c[9], c[10], 20, 128);
3619 adapt_prob(&p->mv_comp[i].class0, s->counts.mv_comp[i].class0[0],
3620 s->counts.mv_comp[i].class0[1], 20, 128);
3621 pp = p->mv_comp[i].bits;
3622 c2 = s->counts.mv_comp[i].bits;
3623 for (j = 0; j < 10; j++)
3624 adapt_prob(&pp[j], c2[j][0], c2[j][1], 20, 128);
3626 for (j = 0; j < 2; j++) {
3627 pp = p->mv_comp[i].class0_fp[j];
3628 c = s->counts.mv_comp[i].class0_fp[j];
3629 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3630 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3631 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3633 pp = p->mv_comp[i].fp;
3634 c = s->counts.mv_comp[i].fp;
3635 adapt_prob(&pp[0], c[0], c[1] + c[2] + c[3], 20, 128);
3636 adapt_prob(&pp[1], c[1], c[2] + c[3], 20, 128);
3637 adapt_prob(&pp[2], c[2], c[3], 20, 128);
3639 if (s->highprecisionmvs) {
3640 adapt_prob(&p->mv_comp[i].class0_hp, s->counts.mv_comp[i].class0_hp[0],
3641 s->counts.mv_comp[i].class0_hp[1], 20, 128);
3642 adapt_prob(&p->mv_comp[i].hp, s->counts.mv_comp[i].hp[0],
3643 s->counts.mv_comp[i].hp[1], 20, 128);
3648 for (i = 0; i < 4; i++) {
3649 uint8_t *pp = p->y_mode[i];
3650 unsigned *c = s->counts.y_mode[i], sum, s2;
3652 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3653 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3654 sum -= c[TM_VP8_PRED];
3655 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3656 sum -= c[VERT_PRED];
3657 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3658 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3660 adapt_prob(&pp[3], s2, sum, 20, 128);
3662 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3663 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3664 sum -= c[DIAG_DOWN_LEFT_PRED];
3665 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3666 sum -= c[VERT_LEFT_PRED];
3667 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3668 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3672 for (i = 0; i < 10; i++) {
3673 uint8_t *pp = p->uv_mode[i];
3674 unsigned *c = s->counts.uv_mode[i], sum, s2;
3676 sum = c[0] + c[1] + c[3] + c[4] + c[5] + c[6] + c[7] + c[8] + c[9];
3677 adapt_prob(&pp[0], c[DC_PRED], sum, 20, 128);
3678 sum -= c[TM_VP8_PRED];
3679 adapt_prob(&pp[1], c[TM_VP8_PRED], sum, 20, 128);
3680 sum -= c[VERT_PRED];
3681 adapt_prob(&pp[2], c[VERT_PRED], sum, 20, 128);
3682 s2 = c[HOR_PRED] + c[DIAG_DOWN_RIGHT_PRED] + c[VERT_RIGHT_PRED];
3684 adapt_prob(&pp[3], s2, sum, 20, 128);
3686 adapt_prob(&pp[4], c[HOR_PRED], s2, 20, 128);
3687 adapt_prob(&pp[5], c[DIAG_DOWN_RIGHT_PRED], c[VERT_RIGHT_PRED], 20, 128);
3688 sum -= c[DIAG_DOWN_LEFT_PRED];
3689 adapt_prob(&pp[6], c[DIAG_DOWN_LEFT_PRED], sum, 20, 128);
3690 sum -= c[VERT_LEFT_PRED];
3691 adapt_prob(&pp[7], c[VERT_LEFT_PRED], sum, 20, 128);
3692 adapt_prob(&pp[8], c[HOR_DOWN_PRED], c[HOR_UP_PRED], 20, 128);
3696 static void free_buffers(VP9Context *s)
3698 av_freep(&s->intra_pred_data[0]);
3699 av_freep(&s->b_base);
3700 av_freep(&s->block_base);
3703 static av_cold int vp9_decode_free(AVCodecContext *ctx)
3705 VP9Context *s = ctx->priv_data;
3708 for (i = 0; i < 2; i++) {
3709 if (s->frames[i].tf.f->data[0])
3710 vp9_unref_frame(ctx, &s->frames[i]);
3711 av_frame_free(&s->frames[i].tf.f);
3713 for (i = 0; i < 8; i++) {
3714 if (s->refs[i].f->data[0])
3715 ff_thread_release_buffer(ctx, &s->refs[i]);
3716 av_frame_free(&s->refs[i].f);
3717 if (s->next_refs[i].f->data[0])
3718 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3719 av_frame_free(&s->next_refs[i].f);
3729 static int vp9_decode_frame(AVCodecContext *ctx, void *frame,
3730 int *got_frame, AVPacket *pkt)
3732 const uint8_t *data = pkt->data;
3733 int size = pkt->size;
3734 VP9Context *s = ctx->priv_data;
3735 int res, tile_row, tile_col, i, ref, row, col;
3736 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
3739 if ((res = decode_frame_header(ctx, data, size, &ref)) < 0) {
3741 } else if (res == 0) {
3742 if (!s->refs[ref].f->data[0]) {
3743 av_log(ctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
3744 return AVERROR_INVALIDDATA;
3746 if ((res = av_frame_ref(frame, s->refs[ref].f)) < 0)
3754 if (s->frames[LAST_FRAME].tf.f->data[0])
3755 vp9_unref_frame(ctx, &s->frames[LAST_FRAME]);
3756 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->data[0] &&
3757 (res = vp9_ref_frame(ctx, &s->frames[LAST_FRAME], &s->frames[CUR_FRAME])) < 0)
3759 if (s->frames[CUR_FRAME].tf.f->data[0])
3760 vp9_unref_frame(ctx, &s->frames[CUR_FRAME]);
3761 if ((res = vp9_alloc_frame(ctx, &s->frames[CUR_FRAME])) < 0)
3763 f = s->frames[CUR_FRAME].tf.f;
3764 f->key_frame = s->keyframe;
3765 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3766 ls_y = f->linesize[0];
3767 ls_uv =f->linesize[1];
3770 for (i = 0; i < 8; i++) {
3771 if (s->next_refs[i].f->data[0])
3772 ff_thread_release_buffer(ctx, &s->next_refs[i]);
3773 if (s->refreshrefmask & (1 << i)) {
3774 res = ff_thread_ref_frame(&s->next_refs[i], &s->frames[CUR_FRAME].tf);
3776 res = ff_thread_ref_frame(&s->next_refs[i], &s->refs[i]);
3782 // main tile decode loop
3783 memset(s->above_partition_ctx, 0, s->cols);
3784 memset(s->above_skip_ctx, 0, s->cols);
3785 if (s->keyframe || s->intraonly) {
3786 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
3788 memset(s->above_mode_ctx, NEARESTMV, s->cols);
3790 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
3791 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
3792 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
3793 memset(s->above_segpred_ctx, 0, s->cols);
3794 s->pass = s->uses_2pass =
3795 ctx->active_thread_type == FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
3796 if ((res = update_block_buffers(ctx)) < 0) {
3797 av_log(ctx, AV_LOG_ERROR,
3798 "Failed to allocate block buffers\n");
3801 if (s->refreshctx && s->parallelmode) {
3804 for (i = 0; i < 4; i++) {
3805 for (j = 0; j < 2; j++)
3806 for (k = 0; k < 2; k++)
3807 for (l = 0; l < 6; l++)
3808 for (m = 0; m < 6; m++)
3809 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
3810 s->prob.coef[i][j][k][l][m], 3);
3811 if (s->txfmmode == i)
3814 s->prob_ctx[s->framectxid].p = s->prob.p;
3815 ff_thread_finish_setup(ctx);
3821 s->block = s->block_base;
3822 s->uvblock[0] = s->uvblock_base[0];
3823 s->uvblock[1] = s->uvblock_base[1];
3824 s->eob = s->eob_base;
3825 s->uveob[0] = s->uveob_base[0];
3826 s->uveob[1] = s->uveob_base[1];
3828 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
3829 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
3830 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
3832 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3835 if (tile_col == s->tiling.tile_cols - 1 &&
3836 tile_row == s->tiling.tile_rows - 1) {
3839 tile_size = AV_RB32(data);
3843 if (tile_size > size) {
3844 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3845 return AVERROR_INVALIDDATA;
3847 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
3848 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
3849 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3850 return AVERROR_INVALIDDATA;
3857 for (row = s->tiling.tile_row_start; row < s->tiling.tile_row_end;
3858 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 32) {
3859 struct VP9Filter *lflvl_ptr = s->lflvl;
3860 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
3862 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
3863 set_tile_offset(&s->tiling.tile_col_start, &s->tiling.tile_col_end,
3864 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
3867 memset(s->left_partition_ctx, 0, 8);
3868 memset(s->left_skip_ctx, 0, 8);
3869 if (s->keyframe || s->intraonly) {
3870 memset(s->left_mode_ctx, DC_PRED, 16);
3872 memset(s->left_mode_ctx, NEARESTMV, 8);
3874 memset(s->left_y_nnz_ctx, 0, 16);
3875 memset(s->left_uv_nnz_ctx, 0, 16);
3876 memset(s->left_segpred_ctx, 0, 8);
3878 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
3881 for (col = s->tiling.tile_col_start;
3882 col < s->tiling.tile_col_end;
3883 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3884 // FIXME integrate with lf code (i.e. zero after each
3885 // use, similar to invtxfm coefficients, or similar)
3887 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
3891 decode_sb_mem(ctx, row, col, lflvl_ptr,
3892 yoff2, uvoff2, BL_64X64);
3894 decode_sb(ctx, row, col, lflvl_ptr,
3895 yoff2, uvoff2, BL_64X64);
3899 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
3907 // backup pre-loopfilter reconstruction data for intra
3908 // prediction of next row of sb64s
3909 if (row + 8 < s->rows) {
3910 memcpy(s->intra_pred_data[0],
3911 f->data[0] + yoff + 63 * ls_y,
3913 memcpy(s->intra_pred_data[1],
3914 f->data[1] + uvoff + 31 * ls_uv,
3916 memcpy(s->intra_pred_data[2],
3917 f->data[2] + uvoff + 31 * ls_uv,
3921 // loopfilter one row
3922 if (s->filter.level) {
3925 lflvl_ptr = s->lflvl;
3926 for (col = 0; col < s->cols;
3927 col += 8, yoff2 += 64, uvoff2 += 32, lflvl_ptr++) {
3928 loopfilter_sb(ctx, lflvl_ptr, row, col, yoff2, uvoff2);
3932 // FIXME maybe we can make this more finegrained by running the
3933 // loopfilter per-block instead of after each sbrow
3934 // In fact that would also make intra pred left preparation easier?
3935 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
3939 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
3941 ff_thread_finish_setup(ctx);
3943 } while (s->pass++ == 1);
3944 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
3947 for (i = 0; i < 8; i++) {
3948 if (s->refs[i].f->data[0])
3949 ff_thread_release_buffer(ctx, &s->refs[i]);
3950 ff_thread_ref_frame(&s->refs[i], &s->next_refs[i]);
3953 if (!s->invisible) {
3954 if ((res = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f)) < 0)
3962 static void vp9_decode_flush(AVCodecContext *ctx)
3964 VP9Context *s = ctx->priv_data;
3967 for (i = 0; i < 2; i++)
3968 vp9_unref_frame(ctx, &s->frames[i]);
3969 for (i = 0; i < 8; i++)
3970 ff_thread_release_buffer(ctx, &s->refs[i]);
3973 static int init_frames(AVCodecContext *ctx)
3975 VP9Context *s = ctx->priv_data;
3978 for (i = 0; i < 2; i++) {
3979 s->frames[i].tf.f = av_frame_alloc();
3980 if (!s->frames[i].tf.f) {
3981 vp9_decode_free(ctx);
3982 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3983 return AVERROR(ENOMEM);
3986 for (i = 0; i < 8; i++) {
3987 s->refs[i].f = av_frame_alloc();
3988 s->next_refs[i].f = av_frame_alloc();
3989 if (!s->refs[i].f || !s->next_refs[i].f) {
3990 vp9_decode_free(ctx);
3991 av_log(ctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
3992 return AVERROR(ENOMEM);
3999 static av_cold int vp9_decode_init(AVCodecContext *ctx)
4001 VP9Context *s = ctx->priv_data;
4003 ctx->internal->allocate_progress = 1;
4004 ctx->pix_fmt = AV_PIX_FMT_YUV420P;
4005 ff_vp9dsp_init(&s->dsp);
4006 ff_videodsp_init(&s->vdsp, 8);
4007 s->filter.sharpness = -1;
4009 return init_frames(ctx);
4012 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
4014 return init_frames(avctx);
4017 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
4020 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
4022 // detect size changes in other threads
4023 if (s->intra_pred_data[0] &&
4024 (!ssrc->intra_pred_data[0] || s->cols != ssrc->cols || s->rows != ssrc->rows)) {
4028 for (i = 0; i < 2; i++) {
4029 if (s->frames[i].tf.f->data[0])
4030 vp9_unref_frame(dst, &s->frames[i]);
4031 if (ssrc->frames[i].tf.f->data[0]) {
4032 if ((res = vp9_ref_frame(dst, &s->frames[i], &ssrc->frames[i])) < 0)
4036 for (i = 0; i < 8; i++) {
4037 if (s->refs[i].f->data[0])
4038 ff_thread_release_buffer(dst, &s->refs[i]);
4039 if (ssrc->next_refs[i].f->data[0]) {
4040 if ((res = ff_thread_ref_frame(&s->refs[i], &ssrc->next_refs[i])) < 0)
4045 s->invisible = ssrc->invisible;
4046 s->keyframe = ssrc->keyframe;
4047 s->uses_2pass = ssrc->uses_2pass;
4048 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
4049 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
4050 if (ssrc->segmentation.enabled) {
4051 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
4052 sizeof(s->segmentation.feat));
4058 AVCodec ff_vp9_decoder = {
4060 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
4061 .type = AVMEDIA_TYPE_VIDEO,
4062 .id = AV_CODEC_ID_VP9,
4063 .priv_data_size = sizeof(VP9Context),
4064 .init = vp9_decode_init,
4065 .close = vp9_decode_free,
4066 .decode = vp9_decode_frame,
4067 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS,
4068 .flush = vp9_decode_flush,
4069 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
4070 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),