2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "libavutil/avassert.h"
35 #include "libavutil/pixdesc.h"
37 #define VP9_SYNCCODE 0x498342
39 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
41 ff_thread_release_buffer(avctx, &f->tf);
42 av_buffer_unref(&f->extradata);
43 av_buffer_unref(&f->hwaccel_priv_buf);
44 f->segmentation_map = NULL;
45 f->hwaccel_picture_private = NULL;
48 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
50 VP9Context *s = avctx->priv_data;
53 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
57 sz = 64 * s->sb_cols * s->sb_rows;
58 f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
63 f->segmentation_map = f->extradata->data;
64 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
67 const AVHWAccel *hwaccel = avctx->hwaccel;
68 av_assert0(!f->hwaccel_picture_private);
69 if (hwaccel->frame_priv_data_size) {
70 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
71 if (!f->hwaccel_priv_buf)
73 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
80 vp9_frame_unref(avctx, f);
81 return AVERROR(ENOMEM);
84 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
88 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
92 dst->extradata = av_buffer_ref(src->extradata);
96 dst->segmentation_map = src->segmentation_map;
98 dst->uses_2pass = src->uses_2pass;
100 if (src->hwaccel_picture_private) {
101 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
102 if (!dst->hwaccel_priv_buf)
104 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
110 vp9_frame_unref(avctx, dst);
111 return AVERROR(ENOMEM);
114 static int update_size(AVCodecContext *avctx, int w, int h)
116 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL + CONFIG_VP9_VAAPI_HWACCEL)
117 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
118 VP9Context *s = avctx->priv_data;
120 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
122 av_assert0(w > 0 && h > 0);
124 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
125 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
128 switch (s->pix_fmt) {
129 case AV_PIX_FMT_YUV420P:
130 #if CONFIG_VP9_DXVA2_HWACCEL
131 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
133 #if CONFIG_VP9_D3D11VA_HWACCEL
134 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
136 #if CONFIG_VP9_VAAPI_HWACCEL
137 *fmtp++ = AV_PIX_FMT_VAAPI;
140 case AV_PIX_FMT_YUV420P10:
141 case AV_PIX_FMT_YUV420P12:
142 #if CONFIG_VP9_VAAPI_HWACCEL
143 *fmtp++ = AV_PIX_FMT_VAAPI;
148 *fmtp++ = s->pix_fmt;
149 *fmtp = AV_PIX_FMT_NONE;
151 ret = ff_thread_get_format(avctx, pix_fmts);
155 avctx->pix_fmt = ret;
156 s->gf_fmt = s->pix_fmt;
164 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
167 s->last_fmt = s->pix_fmt;
168 s->sb_cols = (w + 63) >> 6;
169 s->sb_rows = (h + 63) >> 6;
170 s->cols = (w + 7) >> 3;
171 s->rows = (h + 7) >> 3;
173 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
174 av_freep(&s->intra_pred_data[0]);
175 // FIXME we slightly over-allocate here for subsampled chroma, but a little
176 // bit of padding shouldn't affect performance...
177 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
178 sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
180 return AVERROR(ENOMEM);
181 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
182 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
183 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
184 assign(s->above_y_nnz_ctx, uint8_t *, 16);
185 assign(s->above_mode_ctx, uint8_t *, 16);
186 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
187 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
188 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
189 assign(s->above_partition_ctx, uint8_t *, 8);
190 assign(s->above_skip_ctx, uint8_t *, 8);
191 assign(s->above_txfm_ctx, uint8_t *, 8);
192 assign(s->above_segpred_ctx, uint8_t *, 8);
193 assign(s->above_intra_ctx, uint8_t *, 8);
194 assign(s->above_comp_ctx, uint8_t *, 8);
195 assign(s->above_ref_ctx, uint8_t *, 8);
196 assign(s->above_filter_ctx, uint8_t *, 8);
197 assign(s->lflvl, VP9Filter *, 1);
200 // these will be re-allocated a little later
201 av_freep(&s->b_base);
202 av_freep(&s->block_base);
204 if (s->s.h.bpp != s->last_bpp) {
205 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
206 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
207 s->last_bpp = s->s.h.bpp;
213 static int update_block_buffers(AVCodecContext *avctx)
215 VP9Context *s = avctx->priv_data;
216 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
218 if (s->b_base && s->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
222 av_free(s->block_base);
223 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
224 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
225 if (s->s.frames[CUR_FRAME].uses_2pass) {
226 int sbs = s->sb_cols * s->sb_rows;
228 s->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
229 s->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
230 16 * 16 + 2 * chroma_eobs) * sbs);
231 if (!s->b_base || !s->block_base)
232 return AVERROR(ENOMEM);
233 s->uvblock_base[0] = s->block_base + sbs * 64 * 64 * bytesperpixel;
234 s->uvblock_base[1] = s->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
235 s->eob_base = (uint8_t *) (s->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
236 s->uveob_base[0] = s->eob_base + 16 * 16 * sbs;
237 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs * sbs;
239 s->b_base = av_malloc(sizeof(VP9Block));
240 s->block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
241 16 * 16 + 2 * chroma_eobs);
242 if (!s->b_base || !s->block_base)
243 return AVERROR(ENOMEM);
244 s->uvblock_base[0] = s->block_base + 64 * 64 * bytesperpixel;
245 s->uvblock_base[1] = s->uvblock_base[0] + chroma_blocks * bytesperpixel;
246 s->eob_base = (uint8_t *) (s->uvblock_base[1] + chroma_blocks * bytesperpixel);
247 s->uveob_base[0] = s->eob_base + 16 * 16;
248 s->uveob_base[1] = s->uveob_base[0] + chroma_eobs;
250 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
255 // The sign bit is at the end, not the start, of a bit sequence
256 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
258 int v = get_bits(gb, n);
259 return get_bits1(gb) ? -v : v;
262 static av_always_inline int inv_recenter_nonneg(int v, int m)
267 return m - ((v + 1) >> 1);
271 // differential forward probability updates
272 static int update_prob(VP56RangeCoder *c, int p)
274 static const int inv_map_table[255] = {
275 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
276 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
277 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
278 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
279 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
280 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
281 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
282 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
283 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
284 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
285 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
286 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
287 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
288 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
289 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
290 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
291 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
292 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
297 /* This code is trying to do a differential probability update. For a
298 * current probability A in the range [1, 255], the difference to a new
299 * probability of any value can be expressed differentially as 1-A, 255-A
300 * where some part of this (absolute range) exists both in positive as
301 * well as the negative part, whereas another part only exists in one
302 * half. We're trying to code this shared part differentially, i.e.
303 * times two where the value of the lowest bit specifies the sign, and
304 * the single part is then coded on top of this. This absolute difference
305 * then again has a value of [0, 254], but a bigger value in this range
306 * indicates that we're further away from the original value A, so we
307 * can code this as a VLC code, since higher values are increasingly
308 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
309 * updates vs. the 'fine, exact' updates further down the range, which
310 * adds one extra dimension to this differential update model. */
312 if (!vp8_rac_get(c)) {
313 d = vp8_rac_get_uint(c, 4) + 0;
314 } else if (!vp8_rac_get(c)) {
315 d = vp8_rac_get_uint(c, 4) + 16;
316 } else if (!vp8_rac_get(c)) {
317 d = vp8_rac_get_uint(c, 5) + 32;
319 d = vp8_rac_get_uint(c, 7);
321 d = (d << 1) - 65 + vp8_rac_get(c);
323 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
326 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
327 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
330 static int read_colorspace_details(AVCodecContext *avctx)
332 static const enum AVColorSpace colorspaces[8] = {
333 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
334 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
336 VP9Context *s = avctx->priv_data;
337 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
340 s->s.h.bpp = 8 + bits * 2;
341 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
342 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
343 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
344 static const enum AVPixelFormat pix_fmt_rgb[3] = {
345 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
347 s->ss_h = s->ss_v = 0;
348 avctx->color_range = AVCOL_RANGE_JPEG;
349 s->pix_fmt = pix_fmt_rgb[bits];
350 if (avctx->profile & 1) {
351 if (get_bits1(&s->gb)) {
352 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
353 return AVERROR_INVALIDDATA;
356 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
358 return AVERROR_INVALIDDATA;
361 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
362 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
363 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
364 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
365 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
366 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
367 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
369 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
370 if (avctx->profile & 1) {
371 s->ss_h = get_bits1(&s->gb);
372 s->ss_v = get_bits1(&s->gb);
373 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
374 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
375 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
377 return AVERROR_INVALIDDATA;
378 } else if (get_bits1(&s->gb)) {
379 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
381 return AVERROR_INVALIDDATA;
384 s->ss_h = s->ss_v = 1;
385 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
392 static int decode_frame_header(AVCodecContext *avctx,
393 const uint8_t *data, int size, int *ref)
395 VP9Context *s = avctx->priv_data;
396 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
398 const uint8_t *data2;
401 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
402 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
405 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
406 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
407 return AVERROR_INVALIDDATA;
409 avctx->profile = get_bits1(&s->gb);
410 avctx->profile |= get_bits1(&s->gb) << 1;
411 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
412 if (avctx->profile > 3) {
413 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
414 return AVERROR_INVALIDDATA;
416 s->s.h.profile = avctx->profile;
417 if (get_bits1(&s->gb)) {
418 *ref = get_bits(&s->gb, 3);
422 s->last_keyframe = s->s.h.keyframe;
423 s->s.h.keyframe = !get_bits1(&s->gb);
425 last_invisible = s->s.h.invisible;
426 s->s.h.invisible = !get_bits1(&s->gb);
427 s->s.h.errorres = get_bits1(&s->gb);
428 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
430 if (s->s.h.keyframe) {
431 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
432 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
433 return AVERROR_INVALIDDATA;
435 if ((ret = read_colorspace_details(avctx)) < 0)
437 // for profile 1, here follows the subsampling bits
438 s->s.h.refreshrefmask = 0xff;
439 w = get_bits(&s->gb, 16) + 1;
440 h = get_bits(&s->gb, 16) + 1;
441 if (get_bits1(&s->gb)) // display size
442 skip_bits(&s->gb, 32);
444 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
445 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
446 if (s->s.h.intraonly) {
447 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
448 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
449 return AVERROR_INVALIDDATA;
451 if (avctx->profile >= 1) {
452 if ((ret = read_colorspace_details(avctx)) < 0)
455 s->ss_h = s->ss_v = 1;
458 s->bytesperpixel = 1;
459 s->pix_fmt = AV_PIX_FMT_YUV420P;
460 avctx->colorspace = AVCOL_SPC_BT470BG;
461 avctx->color_range = AVCOL_RANGE_JPEG;
463 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
464 w = get_bits(&s->gb, 16) + 1;
465 h = get_bits(&s->gb, 16) + 1;
466 if (get_bits1(&s->gb)) // display size
467 skip_bits(&s->gb, 32);
469 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
470 s->s.h.refidx[0] = get_bits(&s->gb, 3);
471 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
472 s->s.h.refidx[1] = get_bits(&s->gb, 3);
473 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
474 s->s.h.refidx[2] = get_bits(&s->gb, 3);
475 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
476 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
477 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
478 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
479 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
480 return AVERROR_INVALIDDATA;
482 if (get_bits1(&s->gb)) {
483 w = s->s.refs[s->s.h.refidx[0]].f->width;
484 h = s->s.refs[s->s.h.refidx[0]].f->height;
485 } else if (get_bits1(&s->gb)) {
486 w = s->s.refs[s->s.h.refidx[1]].f->width;
487 h = s->s.refs[s->s.h.refidx[1]].f->height;
488 } else if (get_bits1(&s->gb)) {
489 w = s->s.refs[s->s.h.refidx[2]].f->width;
490 h = s->s.refs[s->s.h.refidx[2]].f->height;
492 w = get_bits(&s->gb, 16) + 1;
493 h = get_bits(&s->gb, 16) + 1;
495 // Note that in this code, "CUR_FRAME" is actually before we
496 // have formally allocated a frame, and thus actually represents
498 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
499 s->s.frames[CUR_FRAME].tf.f->height == h;
500 if (get_bits1(&s->gb)) // display size
501 skip_bits(&s->gb, 32);
502 s->s.h.highprecisionmvs = get_bits1(&s->gb);
503 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
505 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
506 s->s.h.signbias[0] != s->s.h.signbias[2];
507 if (s->s.h.allowcompinter) {
508 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
509 s->s.h.fixcompref = 2;
510 s->s.h.varcompref[0] = 0;
511 s->s.h.varcompref[1] = 1;
512 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
513 s->s.h.fixcompref = 1;
514 s->s.h.varcompref[0] = 0;
515 s->s.h.varcompref[1] = 2;
517 s->s.h.fixcompref = 0;
518 s->s.h.varcompref[0] = 1;
519 s->s.h.varcompref[1] = 2;
524 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
525 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
526 s->s.h.framectxid = c = get_bits(&s->gb, 2);
527 if (s->s.h.keyframe || s->s.h.intraonly)
528 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
530 /* loopfilter header data */
531 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
532 // reset loopfilter defaults
533 s->s.h.lf_delta.ref[0] = 1;
534 s->s.h.lf_delta.ref[1] = 0;
535 s->s.h.lf_delta.ref[2] = -1;
536 s->s.h.lf_delta.ref[3] = -1;
537 s->s.h.lf_delta.mode[0] = 0;
538 s->s.h.lf_delta.mode[1] = 0;
539 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
541 s->s.h.filter.level = get_bits(&s->gb, 6);
542 sharp = get_bits(&s->gb, 3);
543 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
544 // the old cache values since they are still valid
545 if (s->s.h.filter.sharpness != sharp)
546 memset(s->filter_lut.lim_lut, 0, sizeof(s->filter_lut.lim_lut));
547 s->s.h.filter.sharpness = sharp;
548 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
549 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
550 for (i = 0; i < 4; i++)
551 if (get_bits1(&s->gb))
552 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
553 for (i = 0; i < 2; i++)
554 if (get_bits1(&s->gb))
555 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
559 /* quantization header data */
560 s->s.h.yac_qi = get_bits(&s->gb, 8);
561 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
562 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
563 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
564 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
565 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
567 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
569 /* segmentation header info */
570 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
571 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
572 for (i = 0; i < 7; i++)
573 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
574 get_bits(&s->gb, 8) : 255;
575 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
576 for (i = 0; i < 3; i++)
577 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
578 get_bits(&s->gb, 8) : 255;
581 if (get_bits1(&s->gb)) {
582 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
583 for (i = 0; i < 8; i++) {
584 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
585 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
586 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
587 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
588 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
589 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
590 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
595 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
596 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
597 int qyac, qydc, quvac, quvdc, lflvl, sh;
599 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
600 if (s->s.h.segmentation.absolute_vals)
601 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
603 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
605 qyac = s->s.h.yac_qi;
607 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
608 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
609 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
610 qyac = av_clip_uintp2(qyac, 8);
612 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
613 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
614 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
615 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
617 sh = s->s.h.filter.level >= 32;
618 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
619 if (s->s.h.segmentation.absolute_vals)
620 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
622 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
624 lflvl = s->s.h.filter.level;
626 if (s->s.h.lf_delta.enabled) {
627 s->s.h.segmentation.feat[i].lflvl[0][0] =
628 s->s.h.segmentation.feat[i].lflvl[0][1] =
629 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
630 for (j = 1; j < 4; j++) {
631 s->s.h.segmentation.feat[i].lflvl[j][0] =
632 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
633 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
634 s->s.h.segmentation.feat[i].lflvl[j][1] =
635 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
636 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
639 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
640 sizeof(s->s.h.segmentation.feat[i].lflvl));
645 if ((ret = update_size(avctx, w, h)) < 0) {
646 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
650 for (s->s.h.tiling.log2_tile_cols = 0;
651 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
652 s->s.h.tiling.log2_tile_cols++) ;
653 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
654 max = FFMAX(0, max - 1);
655 while (max > s->s.h.tiling.log2_tile_cols) {
656 if (get_bits1(&s->gb))
657 s->s.h.tiling.log2_tile_cols++;
661 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
662 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
663 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
664 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
665 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
666 sizeof(VP56RangeCoder) * s->s.h.tiling.tile_cols);
668 av_log(avctx, AV_LOG_ERROR, "Ran out of memory during range coder init\n");
669 return AVERROR(ENOMEM);
673 /* check reference frames */
674 if (!s->s.h.keyframe && !s->s.h.intraonly) {
675 for (i = 0; i < 3; i++) {
676 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
677 int refw = ref->width, refh = ref->height;
679 if (ref->format != avctx->pix_fmt) {
680 av_log(avctx, AV_LOG_ERROR,
681 "Ref pixfmt (%s) did not match current frame (%s)",
682 av_get_pix_fmt_name(ref->format),
683 av_get_pix_fmt_name(avctx->pix_fmt));
684 return AVERROR_INVALIDDATA;
685 } else if (refw == w && refh == h) {
686 s->mvscale[i][0] = s->mvscale[i][1] = 0;
688 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
689 av_log(avctx, AV_LOG_ERROR,
690 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
692 return AVERROR_INVALIDDATA;
694 s->mvscale[i][0] = (refw << 14) / w;
695 s->mvscale[i][1] = (refh << 14) / h;
696 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
697 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
702 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
703 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
704 s->prob_ctx[3].p = ff_vp9_default_probs;
705 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
706 sizeof(ff_vp9_default_coef_probs));
707 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
708 sizeof(ff_vp9_default_coef_probs));
709 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
710 sizeof(ff_vp9_default_coef_probs));
711 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
712 sizeof(ff_vp9_default_coef_probs));
713 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
714 s->prob_ctx[c].p = ff_vp9_default_probs;
715 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
716 sizeof(ff_vp9_default_coef_probs));
719 // next 16 bits is size of the rest of the header (arith-coded)
720 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
721 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
723 data2 = align_get_bits(&s->gb);
724 if (size2 > size - (data2 - data)) {
725 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
726 return AVERROR_INVALIDDATA;
728 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
732 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
733 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
734 return AVERROR_INVALIDDATA;
737 if (s->s.h.keyframe || s->s.h.intraonly) {
738 memset(s->counts.coef, 0, sizeof(s->counts.coef));
739 memset(s->counts.eob, 0, sizeof(s->counts.eob));
741 memset(&s->counts, 0, sizeof(s->counts));
743 /* FIXME is it faster to not copy here, but do it down in the fw updates
744 * as explicit copies if the fw update is missing (and skip the copy upon
746 s->prob.p = s->prob_ctx[c].p;
749 if (s->s.h.lossless) {
750 s->s.h.txfmmode = TX_4X4;
752 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
753 if (s->s.h.txfmmode == 3)
754 s->s.h.txfmmode += vp8_rac_get(&s->c);
756 if (s->s.h.txfmmode == TX_SWITCHABLE) {
757 for (i = 0; i < 2; i++)
758 if (vp56_rac_get_prob_branchy(&s->c, 252))
759 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
760 for (i = 0; i < 2; i++)
761 for (j = 0; j < 2; j++)
762 if (vp56_rac_get_prob_branchy(&s->c, 252))
763 s->prob.p.tx16p[i][j] =
764 update_prob(&s->c, s->prob.p.tx16p[i][j]);
765 for (i = 0; i < 2; i++)
766 for (j = 0; j < 3; j++)
767 if (vp56_rac_get_prob_branchy(&s->c, 252))
768 s->prob.p.tx32p[i][j] =
769 update_prob(&s->c, s->prob.p.tx32p[i][j]);
774 for (i = 0; i < 4; i++) {
775 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
776 if (vp8_rac_get(&s->c)) {
777 for (j = 0; j < 2; j++)
778 for (k = 0; k < 2; k++)
779 for (l = 0; l < 6; l++)
780 for (m = 0; m < 6; m++) {
781 uint8_t *p = s->prob.coef[i][j][k][l][m];
782 uint8_t *r = ref[j][k][l][m];
783 if (m >= 3 && l == 0) // dc only has 3 pt
785 for (n = 0; n < 3; n++) {
786 if (vp56_rac_get_prob_branchy(&s->c, 252))
787 p[n] = update_prob(&s->c, r[n]);
794 for (j = 0; j < 2; j++)
795 for (k = 0; k < 2; k++)
796 for (l = 0; l < 6; l++)
797 for (m = 0; m < 6; m++) {
798 uint8_t *p = s->prob.coef[i][j][k][l][m];
799 uint8_t *r = ref[j][k][l][m];
800 if (m > 3 && l == 0) // dc only has 3 pt
806 if (s->s.h.txfmmode == i)
811 for (i = 0; i < 3; i++)
812 if (vp56_rac_get_prob_branchy(&s->c, 252))
813 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
814 if (!s->s.h.keyframe && !s->s.h.intraonly) {
815 for (i = 0; i < 7; i++)
816 for (j = 0; j < 3; j++)
817 if (vp56_rac_get_prob_branchy(&s->c, 252))
818 s->prob.p.mv_mode[i][j] =
819 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
821 if (s->s.h.filtermode == FILTER_SWITCHABLE)
822 for (i = 0; i < 4; i++)
823 for (j = 0; j < 2; j++)
824 if (vp56_rac_get_prob_branchy(&s->c, 252))
825 s->prob.p.filter[i][j] =
826 update_prob(&s->c, s->prob.p.filter[i][j]);
828 for (i = 0; i < 4; i++)
829 if (vp56_rac_get_prob_branchy(&s->c, 252))
830 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
832 if (s->s.h.allowcompinter) {
833 s->s.h.comppredmode = vp8_rac_get(&s->c);
834 if (s->s.h.comppredmode)
835 s->s.h.comppredmode += vp8_rac_get(&s->c);
836 if (s->s.h.comppredmode == PRED_SWITCHABLE)
837 for (i = 0; i < 5; i++)
838 if (vp56_rac_get_prob_branchy(&s->c, 252))
840 update_prob(&s->c, s->prob.p.comp[i]);
842 s->s.h.comppredmode = PRED_SINGLEREF;
845 if (s->s.h.comppredmode != PRED_COMPREF) {
846 for (i = 0; i < 5; i++) {
847 if (vp56_rac_get_prob_branchy(&s->c, 252))
848 s->prob.p.single_ref[i][0] =
849 update_prob(&s->c, s->prob.p.single_ref[i][0]);
850 if (vp56_rac_get_prob_branchy(&s->c, 252))
851 s->prob.p.single_ref[i][1] =
852 update_prob(&s->c, s->prob.p.single_ref[i][1]);
856 if (s->s.h.comppredmode != PRED_SINGLEREF) {
857 for (i = 0; i < 5; i++)
858 if (vp56_rac_get_prob_branchy(&s->c, 252))
859 s->prob.p.comp_ref[i] =
860 update_prob(&s->c, s->prob.p.comp_ref[i]);
863 for (i = 0; i < 4; i++)
864 for (j = 0; j < 9; j++)
865 if (vp56_rac_get_prob_branchy(&s->c, 252))
866 s->prob.p.y_mode[i][j] =
867 update_prob(&s->c, s->prob.p.y_mode[i][j]);
869 for (i = 0; i < 4; i++)
870 for (j = 0; j < 4; j++)
871 for (k = 0; k < 3; k++)
872 if (vp56_rac_get_prob_branchy(&s->c, 252))
873 s->prob.p.partition[3 - i][j][k] =
875 s->prob.p.partition[3 - i][j][k]);
877 // mv fields don't use the update_prob subexp model for some reason
878 for (i = 0; i < 3; i++)
879 if (vp56_rac_get_prob_branchy(&s->c, 252))
880 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
882 for (i = 0; i < 2; i++) {
883 if (vp56_rac_get_prob_branchy(&s->c, 252))
884 s->prob.p.mv_comp[i].sign =
885 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
887 for (j = 0; j < 10; j++)
888 if (vp56_rac_get_prob_branchy(&s->c, 252))
889 s->prob.p.mv_comp[i].classes[j] =
890 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
892 if (vp56_rac_get_prob_branchy(&s->c, 252))
893 s->prob.p.mv_comp[i].class0 =
894 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
896 for (j = 0; j < 10; j++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.mv_comp[i].bits[j] =
899 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
902 for (i = 0; i < 2; i++) {
903 for (j = 0; j < 2; j++)
904 for (k = 0; k < 3; k++)
905 if (vp56_rac_get_prob_branchy(&s->c, 252))
906 s->prob.p.mv_comp[i].class0_fp[j][k] =
907 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
909 for (j = 0; j < 3; j++)
910 if (vp56_rac_get_prob_branchy(&s->c, 252))
911 s->prob.p.mv_comp[i].fp[j] =
912 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
915 if (s->s.h.highprecisionmvs) {
916 for (i = 0; i < 2; i++) {
917 if (vp56_rac_get_prob_branchy(&s->c, 252))
918 s->prob.p.mv_comp[i].class0_hp =
919 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 s->prob.p.mv_comp[i].hp =
923 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
928 return (data2 - data) + size2;
931 static void decode_sb(AVCodecContext *avctx, int row, int col, VP9Filter *lflvl,
932 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
934 VP9Context *s = avctx->priv_data;
935 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
936 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
937 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
938 s->prob.p.partition[bl][c];
939 enum BlockPartition bp;
940 ptrdiff_t hbs = 4 >> bl;
941 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
942 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
943 int bytesperpixel = s->bytesperpixel;
946 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
947 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
948 } else if (col + hbs < s->cols) { // FIXME why not <=?
949 if (row + hbs < s->rows) { // FIXME why not <=?
950 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
953 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
956 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
957 yoff += hbs * 8 * y_stride;
958 uvoff += hbs * 8 * uv_stride >> s->ss_v;
959 ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
962 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
963 yoff += hbs * 8 * bytesperpixel;
964 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
965 ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
967 case PARTITION_SPLIT:
968 decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
969 decode_sb(avctx, row, col + hbs, lflvl,
970 yoff + 8 * hbs * bytesperpixel,
971 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
972 yoff += hbs * 8 * y_stride;
973 uvoff += hbs * 8 * uv_stride >> s->ss_v;
974 decode_sb(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
975 decode_sb(avctx, row + hbs, col + hbs, lflvl,
976 yoff + 8 * hbs * bytesperpixel,
977 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
982 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
983 bp = PARTITION_SPLIT;
984 decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
985 decode_sb(avctx, row, col + hbs, lflvl,
986 yoff + 8 * hbs * bytesperpixel,
987 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
990 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
992 } else if (row + hbs < s->rows) { // FIXME why not <=?
993 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
994 bp = PARTITION_SPLIT;
995 decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
996 yoff += hbs * 8 * y_stride;
997 uvoff += hbs * 8 * uv_stride >> s->ss_v;
998 decode_sb(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1001 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
1004 bp = PARTITION_SPLIT;
1005 decode_sb(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
1007 s->counts.partition[bl][c][bp]++;
1010 static void decode_sb_mem(AVCodecContext *avctx, int row, int col, VP9Filter *lflvl,
1011 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1013 VP9Context *s = avctx->priv_data;
1015 ptrdiff_t hbs = 4 >> bl;
1016 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1017 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1018 int bytesperpixel = s->bytesperpixel;
1021 av_assert2(b->bl == BL_8X8);
1022 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1023 } else if (s->b->bl == bl) {
1024 ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1025 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1026 yoff += hbs * 8 * y_stride;
1027 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1028 ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1029 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1030 yoff += hbs * 8 * bytesperpixel;
1031 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1032 ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1035 decode_sb_mem(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
1036 if (col + hbs < s->cols) { // FIXME why not <=?
1037 if (row + hbs < s->rows) {
1038 decode_sb_mem(avctx, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1039 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1040 yoff += hbs * 8 * y_stride;
1041 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1042 decode_sb_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1043 decode_sb_mem(avctx, row + hbs, col + hbs, lflvl,
1044 yoff + 8 * hbs * bytesperpixel,
1045 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1047 yoff += hbs * 8 * bytesperpixel;
1048 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1049 decode_sb_mem(avctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1051 } else if (row + hbs < s->rows) {
1052 yoff += hbs * 8 * y_stride;
1053 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1054 decode_sb_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1059 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1061 int sb_start = ( idx * n) >> log2_n;
1062 int sb_end = ((idx + 1) * n) >> log2_n;
1063 *start = FFMIN(sb_start, n) << 3;
1064 *end = FFMIN(sb_end, n) << 3;
1067 static void free_buffers(VP9Context *s)
1069 av_freep(&s->intra_pred_data[0]);
1070 av_freep(&s->b_base);
1071 av_freep(&s->block_base);
1074 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1076 VP9Context *s = avctx->priv_data;
1079 for (i = 0; i < 3; i++) {
1080 if (s->s.frames[i].tf.f->buf[0])
1081 vp9_frame_unref(avctx, &s->s.frames[i]);
1082 av_frame_free(&s->s.frames[i].tf.f);
1084 for (i = 0; i < 8; i++) {
1085 if (s->s.refs[i].f->buf[0])
1086 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1087 av_frame_free(&s->s.refs[i].f);
1088 if (s->next_refs[i].f->buf[0])
1089 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1090 av_frame_free(&s->next_refs[i].f);
1100 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1101 int *got_frame, AVPacket *pkt)
1103 const uint8_t *data = pkt->data;
1104 int size = pkt->size;
1105 VP9Context *s = avctx->priv_data;
1106 int ret, tile_row, tile_col, i, ref, row, col;
1107 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1108 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1109 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1113 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1115 } else if (ret == 0) {
1116 if (!s->s.refs[ref].f->buf[0]) {
1117 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1118 return AVERROR_INVALIDDATA;
1120 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1122 ((AVFrame *)frame)->pts = pkt->pts;
1124 FF_DISABLE_DEPRECATION_WARNINGS
1125 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1126 FF_ENABLE_DEPRECATION_WARNINGS
1128 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1129 for (i = 0; i < 8; i++) {
1130 if (s->next_refs[i].f->buf[0])
1131 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1132 if (s->s.refs[i].f->buf[0] &&
1133 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1142 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1143 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1144 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1145 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1146 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1149 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1150 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1151 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1152 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1154 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1155 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1156 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1158 f = s->s.frames[CUR_FRAME].tf.f;
1159 f->key_frame = s->s.h.keyframe;
1160 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1161 ls_y = f->linesize[0];
1162 ls_uv =f->linesize[1];
1164 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1165 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1166 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1167 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1171 for (i = 0; i < 8; i++) {
1172 if (s->next_refs[i].f->buf[0])
1173 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1174 if (s->s.h.refreshrefmask & (1 << i)) {
1175 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1176 } else if (s->s.refs[i].f->buf[0]) {
1177 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1183 if (avctx->hwaccel) {
1184 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1187 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1190 ret = avctx->hwaccel->end_frame(avctx);
1196 // main tile decode loop
1197 bytesperpixel = s->bytesperpixel;
1198 memset(s->above_partition_ctx, 0, s->cols);
1199 memset(s->above_skip_ctx, 0, s->cols);
1200 if (s->s.h.keyframe || s->s.h.intraonly) {
1201 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1203 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1205 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1206 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1207 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1208 memset(s->above_segpred_ctx, 0, s->cols);
1209 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1210 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1211 if ((ret = update_block_buffers(avctx)) < 0) {
1212 av_log(avctx, AV_LOG_ERROR,
1213 "Failed to allocate block buffers\n");
1216 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1219 for (i = 0; i < 4; i++) {
1220 for (j = 0; j < 2; j++)
1221 for (k = 0; k < 2; k++)
1222 for (l = 0; l < 6; l++)
1223 for (m = 0; m < 6; m++)
1224 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1225 s->prob.coef[i][j][k][l][m], 3);
1226 if (s->s.h.txfmmode == i)
1229 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1230 ff_thread_finish_setup(avctx);
1231 } else if (!s->s.h.refreshctx) {
1232 ff_thread_finish_setup(avctx);
1238 s->block = s->block_base;
1239 s->uvblock[0] = s->uvblock_base[0];
1240 s->uvblock[1] = s->uvblock_base[1];
1241 s->eob = s->eob_base;
1242 s->uveob[0] = s->uveob_base[0];
1243 s->uveob[1] = s->uveob_base[1];
1245 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1246 set_tile_offset(&s->tile_row_start, &s->tile_row_end,
1247 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1249 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1252 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1253 tile_row == s->s.h.tiling.tile_rows - 1) {
1256 tile_size = AV_RB32(data);
1260 if (tile_size > size) {
1261 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1262 return AVERROR_INVALIDDATA;
1264 ret = ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1267 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
1268 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1269 return AVERROR_INVALIDDATA;
1276 for (row = s->tile_row_start; row < s->tile_row_end;
1277 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1278 VP9Filter *lflvl_ptr = s->lflvl;
1279 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1281 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1282 set_tile_offset(&s->tile_col_start, &s->tile_col_end,
1283 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1286 memset(s->left_partition_ctx, 0, 8);
1287 memset(s->left_skip_ctx, 0, 8);
1288 if (s->s.h.keyframe || s->s.h.intraonly) {
1289 memset(s->left_mode_ctx, DC_PRED, 16);
1291 memset(s->left_mode_ctx, NEARESTMV, 8);
1293 memset(s->left_y_nnz_ctx, 0, 16);
1294 memset(s->left_uv_nnz_ctx, 0, 32);
1295 memset(s->left_segpred_ctx, 0, 8);
1297 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1300 for (col = s->tile_col_start;
1301 col < s->tile_col_end;
1302 col += 8, yoff2 += 64 * bytesperpixel,
1303 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1304 // FIXME integrate with lf code (i.e. zero after each
1305 // use, similar to invtxfm coefficients, or similar)
1307 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1311 decode_sb_mem(avctx, row, col, lflvl_ptr,
1312 yoff2, uvoff2, BL_64X64);
1314 decode_sb(avctx, row, col, lflvl_ptr,
1315 yoff2, uvoff2, BL_64X64);
1319 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1325 // backup pre-loopfilter reconstruction data for intra
1326 // prediction of next row of sb64s
1327 if (row + 8 < s->rows) {
1328 memcpy(s->intra_pred_data[0],
1329 f->data[0] + yoff + 63 * ls_y,
1330 8 * s->cols * bytesperpixel);
1331 memcpy(s->intra_pred_data[1],
1332 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1333 8 * s->cols * bytesperpixel >> s->ss_h);
1334 memcpy(s->intra_pred_data[2],
1335 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1336 8 * s->cols * bytesperpixel >> s->ss_h);
1339 // loopfilter one row
1340 if (s->s.h.filter.level) {
1343 lflvl_ptr = s->lflvl;
1344 for (col = 0; col < s->cols;
1345 col += 8, yoff2 += 64 * bytesperpixel,
1346 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1347 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1352 // FIXME maybe we can make this more finegrained by running the
1353 // loopfilter per-block instead of after each sbrow
1354 // In fact that would also make intra pred left preparation easier?
1355 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1359 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1360 ff_vp9_adapt_probs(s);
1361 ff_thread_finish_setup(avctx);
1363 } while (s->pass++ == 1);
1364 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1368 for (i = 0; i < 8; i++) {
1369 if (s->s.refs[i].f->buf[0])
1370 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1371 if (s->next_refs[i].f->buf[0] &&
1372 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1376 if (!s->s.h.invisible) {
1377 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1385 static void vp9_decode_flush(AVCodecContext *avctx)
1387 VP9Context *s = avctx->priv_data;
1390 for (i = 0; i < 3; i++)
1391 vp9_frame_unref(avctx, &s->s.frames[i]);
1392 for (i = 0; i < 8; i++)
1393 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1396 static int init_frames(AVCodecContext *avctx)
1398 VP9Context *s = avctx->priv_data;
1401 for (i = 0; i < 3; i++) {
1402 s->s.frames[i].tf.f = av_frame_alloc();
1403 if (!s->s.frames[i].tf.f) {
1404 vp9_decode_free(avctx);
1405 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1406 return AVERROR(ENOMEM);
1409 for (i = 0; i < 8; i++) {
1410 s->s.refs[i].f = av_frame_alloc();
1411 s->next_refs[i].f = av_frame_alloc();
1412 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1413 vp9_decode_free(avctx);
1414 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1415 return AVERROR(ENOMEM);
1422 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1424 VP9Context *s = avctx->priv_data;
1426 avctx->internal->allocate_progress = 1;
1428 s->s.h.filter.sharpness = -1;
1430 return init_frames(avctx);
1434 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1436 return init_frames(avctx);
1439 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1442 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1444 for (i = 0; i < 3; i++) {
1445 if (s->s.frames[i].tf.f->buf[0])
1446 vp9_frame_unref(dst, &s->s.frames[i]);
1447 if (ssrc->s.frames[i].tf.f->buf[0]) {
1448 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1452 for (i = 0; i < 8; i++) {
1453 if (s->s.refs[i].f->buf[0])
1454 ff_thread_release_buffer(dst, &s->s.refs[i]);
1455 if (ssrc->next_refs[i].f->buf[0]) {
1456 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1461 s->s.h.invisible = ssrc->s.h.invisible;
1462 s->s.h.keyframe = ssrc->s.h.keyframe;
1463 s->s.h.intraonly = ssrc->s.h.intraonly;
1464 s->ss_v = ssrc->ss_v;
1465 s->ss_h = ssrc->ss_h;
1466 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1467 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1468 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1469 s->bytesperpixel = ssrc->bytesperpixel;
1470 s->gf_fmt = ssrc->gf_fmt;
1473 s->s.h.bpp = ssrc->s.h.bpp;
1474 s->bpp_index = ssrc->bpp_index;
1475 s->pix_fmt = ssrc->pix_fmt;
1476 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1477 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1478 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1479 sizeof(s->s.h.segmentation.feat));
1485 AVCodec ff_vp9_decoder = {
1487 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1488 .type = AVMEDIA_TYPE_VIDEO,
1489 .id = AV_CODEC_ID_VP9,
1490 .priv_data_size = sizeof(VP9Context),
1491 .init = vp9_decode_init,
1492 .close = vp9_decode_free,
1493 .decode = vp9_decode_frame,
1494 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
1495 .flush = vp9_decode_flush,
1496 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1497 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1498 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),