2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
37 #include "libavutil/video_enc_params.h"
39 #define VP9_SYNCCODE 0x498342
42 static void vp9_free_entries(AVCodecContext *avctx) {
43 VP9Context *s = avctx->priv_data;
45 if (avctx->active_thread_type & FF_THREAD_SLICE) {
46 pthread_mutex_destroy(&s->progress_mutex);
47 pthread_cond_destroy(&s->progress_cond);
48 av_freep(&s->entries);
52 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
53 VP9Context *s = avctx->priv_data;
56 if (avctx->active_thread_type & FF_THREAD_SLICE) {
58 av_freep(&s->entries);
60 s->entries = av_malloc_array(n, sizeof(atomic_int));
63 av_freep(&s->entries);
64 return AVERROR(ENOMEM);
67 for (i = 0; i < n; i++)
68 atomic_init(&s->entries[i], 0);
70 pthread_mutex_init(&s->progress_mutex, NULL);
71 pthread_cond_init(&s->progress_cond, NULL);
76 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
77 pthread_mutex_lock(&s->progress_mutex);
78 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
79 pthread_cond_signal(&s->progress_cond);
80 pthread_mutex_unlock(&s->progress_mutex);
83 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
84 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
87 pthread_mutex_lock(&s->progress_mutex);
88 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
89 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
90 pthread_mutex_unlock(&s->progress_mutex);
93 static void vp9_free_entries(AVCodecContext *avctx) {}
94 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
97 static void vp9_tile_data_free(VP9TileData *td)
99 av_freep(&td->b_base);
100 av_freep(&td->block_base);
101 av_freep(&td->block_structure);
104 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
106 ff_thread_release_buffer(avctx, &f->tf);
107 av_buffer_unref(&f->extradata);
108 av_buffer_unref(&f->hwaccel_priv_buf);
109 f->segmentation_map = NULL;
110 f->hwaccel_picture_private = NULL;
113 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
115 VP9Context *s = avctx->priv_data;
118 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
122 sz = 64 * s->sb_cols * s->sb_rows;
123 if (sz != s->frame_extradata_pool_size) {
124 av_buffer_pool_uninit(&s->frame_extradata_pool);
125 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
126 if (!s->frame_extradata_pool) {
127 s->frame_extradata_pool_size = 0;
130 s->frame_extradata_pool_size = sz;
132 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
136 memset(f->extradata->data, 0, f->extradata->size);
138 f->segmentation_map = f->extradata->data;
139 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
141 if (avctx->hwaccel) {
142 const AVHWAccel *hwaccel = avctx->hwaccel;
143 av_assert0(!f->hwaccel_picture_private);
144 if (hwaccel->frame_priv_data_size) {
145 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
146 if (!f->hwaccel_priv_buf)
148 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
155 vp9_frame_unref(avctx, f);
156 return AVERROR(ENOMEM);
159 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
163 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
167 dst->extradata = av_buffer_ref(src->extradata);
171 dst->segmentation_map = src->segmentation_map;
173 dst->uses_2pass = src->uses_2pass;
175 if (src->hwaccel_picture_private) {
176 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
177 if (!dst->hwaccel_priv_buf)
179 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
185 vp9_frame_unref(avctx, dst);
186 return AVERROR(ENOMEM);
189 static int update_size(AVCodecContext *avctx, int w, int h)
191 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
192 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
193 CONFIG_VP9_NVDEC_HWACCEL + \
194 CONFIG_VP9_VAAPI_HWACCEL + \
195 CONFIG_VP9_VDPAU_HWACCEL)
196 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
197 VP9Context *s = avctx->priv_data;
199 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
202 av_assert0(w > 0 && h > 0);
204 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
205 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
208 switch (s->pix_fmt) {
209 case AV_PIX_FMT_YUV420P:
210 #if CONFIG_VP9_VDPAU_HWACCEL
211 *fmtp++ = AV_PIX_FMT_VDPAU;
213 case AV_PIX_FMT_YUV420P10:
214 #if CONFIG_VP9_DXVA2_HWACCEL
215 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
217 #if CONFIG_VP9_D3D11VA_HWACCEL
218 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
219 *fmtp++ = AV_PIX_FMT_D3D11;
221 #if CONFIG_VP9_NVDEC_HWACCEL
222 *fmtp++ = AV_PIX_FMT_CUDA;
224 #if CONFIG_VP9_VAAPI_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VAAPI;
227 #if CONFIG_VP9_VDPAU_HWACCEL
228 *fmtp++ = AV_PIX_FMT_VDPAU;
231 case AV_PIX_FMT_YUV420P12:
232 #if CONFIG_VP9_NVDEC_HWACCEL
233 *fmtp++ = AV_PIX_FMT_CUDA;
235 #if CONFIG_VP9_VAAPI_HWACCEL
236 *fmtp++ = AV_PIX_FMT_VAAPI;
238 #if CONFIG_VP9_VDPAU_HWACCEL
239 *fmtp++ = AV_PIX_FMT_VDPAU;
244 *fmtp++ = s->pix_fmt;
245 *fmtp = AV_PIX_FMT_NONE;
247 ret = ff_thread_get_format(avctx, pix_fmts);
251 avctx->pix_fmt = ret;
252 s->gf_fmt = s->pix_fmt;
260 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
263 s->last_fmt = s->pix_fmt;
264 s->sb_cols = (w + 63) >> 6;
265 s->sb_rows = (h + 63) >> 6;
266 s->cols = (w + 7) >> 3;
267 s->rows = (h + 7) >> 3;
268 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
270 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
271 av_freep(&s->intra_pred_data[0]);
272 // FIXME we slightly over-allocate here for subsampled chroma, but a little
273 // bit of padding shouldn't affect performance...
274 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
275 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
277 return AVERROR(ENOMEM);
278 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
279 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
280 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
281 assign(s->above_y_nnz_ctx, uint8_t *, 16);
282 assign(s->above_mode_ctx, uint8_t *, 16);
283 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
284 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
285 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
286 assign(s->above_partition_ctx, uint8_t *, 8);
287 assign(s->above_skip_ctx, uint8_t *, 8);
288 assign(s->above_txfm_ctx, uint8_t *, 8);
289 assign(s->above_segpred_ctx, uint8_t *, 8);
290 assign(s->above_intra_ctx, uint8_t *, 8);
291 assign(s->above_comp_ctx, uint8_t *, 8);
292 assign(s->above_ref_ctx, uint8_t *, 8);
293 assign(s->above_filter_ctx, uint8_t *, 8);
294 assign(s->lflvl, VP9Filter *, lflvl_len);
298 for (i = 0; i < s->active_tile_cols; i++)
299 vp9_tile_data_free(&s->td[i]);
302 if (s->s.h.bpp != s->last_bpp) {
303 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
304 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
305 s->last_bpp = s->s.h.bpp;
311 static int update_block_buffers(AVCodecContext *avctx)
314 VP9Context *s = avctx->priv_data;
315 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
316 VP9TileData *td = &s->td[0];
318 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
321 vp9_tile_data_free(td);
322 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
323 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
324 if (s->s.frames[CUR_FRAME].uses_2pass) {
325 int sbs = s->sb_cols * s->sb_rows;
327 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
328 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
329 16 * 16 + 2 * chroma_eobs) * sbs);
330 if (!td->b_base || !td->block_base)
331 return AVERROR(ENOMEM);
332 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
333 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
334 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
335 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
336 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
338 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
339 td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
340 if (!td->block_structure)
341 return AVERROR(ENOMEM);
344 for (i = 1; i < s->active_tile_cols; i++)
345 vp9_tile_data_free(&s->td[i]);
347 for (i = 0; i < s->active_tile_cols; i++) {
348 s->td[i].b_base = av_malloc(sizeof(VP9Block));
349 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
350 16 * 16 + 2 * chroma_eobs);
351 if (!s->td[i].b_base || !s->td[i].block_base)
352 return AVERROR(ENOMEM);
353 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
354 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
355 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
356 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
357 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
359 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
360 s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
361 if (!s->td[i].block_structure)
362 return AVERROR(ENOMEM);
366 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
371 // The sign bit is at the end, not the start, of a bit sequence
372 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
374 int v = get_bits(gb, n);
375 return get_bits1(gb) ? -v : v;
378 static av_always_inline int inv_recenter_nonneg(int v, int m)
383 return m - ((v + 1) >> 1);
387 // differential forward probability updates
388 static int update_prob(VP56RangeCoder *c, int p)
390 static const uint8_t inv_map_table[255] = {
391 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
392 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
393 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
394 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
395 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
396 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
397 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
398 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
399 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
400 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
401 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
402 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
403 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
404 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
405 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
406 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
407 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
408 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
413 /* This code is trying to do a differential probability update. For a
414 * current probability A in the range [1, 255], the difference to a new
415 * probability of any value can be expressed differentially as 1-A, 255-A
416 * where some part of this (absolute range) exists both in positive as
417 * well as the negative part, whereas another part only exists in one
418 * half. We're trying to code this shared part differentially, i.e.
419 * times two where the value of the lowest bit specifies the sign, and
420 * the single part is then coded on top of this. This absolute difference
421 * then again has a value of [0, 254], but a bigger value in this range
422 * indicates that we're further away from the original value A, so we
423 * can code this as a VLC code, since higher values are increasingly
424 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
425 * updates vs. the 'fine, exact' updates further down the range, which
426 * adds one extra dimension to this differential update model. */
428 if (!vp8_rac_get(c)) {
429 d = vp8_rac_get_uint(c, 4) + 0;
430 } else if (!vp8_rac_get(c)) {
431 d = vp8_rac_get_uint(c, 4) + 16;
432 } else if (!vp8_rac_get(c)) {
433 d = vp8_rac_get_uint(c, 5) + 32;
435 d = vp8_rac_get_uint(c, 7);
437 d = (d << 1) - 65 + vp8_rac_get(c);
439 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
442 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
443 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
446 static int read_colorspace_details(AVCodecContext *avctx)
448 static const enum AVColorSpace colorspaces[8] = {
449 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
450 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
452 VP9Context *s = avctx->priv_data;
453 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
456 s->s.h.bpp = 8 + bits * 2;
457 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
458 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
459 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
460 static const enum AVPixelFormat pix_fmt_rgb[3] = {
461 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
463 s->ss_h = s->ss_v = 0;
464 avctx->color_range = AVCOL_RANGE_JPEG;
465 s->pix_fmt = pix_fmt_rgb[bits];
466 if (avctx->profile & 1) {
467 if (get_bits1(&s->gb)) {
468 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
469 return AVERROR_INVALIDDATA;
472 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
474 return AVERROR_INVALIDDATA;
477 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
478 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
479 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
480 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
481 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
482 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
483 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
485 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
486 if (avctx->profile & 1) {
487 s->ss_h = get_bits1(&s->gb);
488 s->ss_v = get_bits1(&s->gb);
489 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
490 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
491 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
493 return AVERROR_INVALIDDATA;
494 } else if (get_bits1(&s->gb)) {
495 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
497 return AVERROR_INVALIDDATA;
500 s->ss_h = s->ss_v = 1;
501 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
508 static int decode_frame_header(AVCodecContext *avctx,
509 const uint8_t *data, int size, int *ref)
511 VP9Context *s = avctx->priv_data;
512 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
514 const uint8_t *data2;
517 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
518 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
521 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
522 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
523 return AVERROR_INVALIDDATA;
525 avctx->profile = get_bits1(&s->gb);
526 avctx->profile |= get_bits1(&s->gb) << 1;
527 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
528 if (avctx->profile > 3) {
529 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
530 return AVERROR_INVALIDDATA;
532 s->s.h.profile = avctx->profile;
533 if (get_bits1(&s->gb)) {
534 *ref = get_bits(&s->gb, 3);
538 s->last_keyframe = s->s.h.keyframe;
539 s->s.h.keyframe = !get_bits1(&s->gb);
541 last_invisible = s->s.h.invisible;
542 s->s.h.invisible = !get_bits1(&s->gb);
543 s->s.h.errorres = get_bits1(&s->gb);
544 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
546 if (s->s.h.keyframe) {
547 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
548 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
549 return AVERROR_INVALIDDATA;
551 if ((ret = read_colorspace_details(avctx)) < 0)
553 // for profile 1, here follows the subsampling bits
554 s->s.h.refreshrefmask = 0xff;
555 w = get_bits(&s->gb, 16) + 1;
556 h = get_bits(&s->gb, 16) + 1;
557 if (get_bits1(&s->gb)) // display size
558 skip_bits(&s->gb, 32);
560 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
561 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
562 if (s->s.h.intraonly) {
563 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
564 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
565 return AVERROR_INVALIDDATA;
567 if (avctx->profile >= 1) {
568 if ((ret = read_colorspace_details(avctx)) < 0)
571 s->ss_h = s->ss_v = 1;
574 s->bytesperpixel = 1;
575 s->pix_fmt = AV_PIX_FMT_YUV420P;
576 avctx->colorspace = AVCOL_SPC_BT470BG;
577 avctx->color_range = AVCOL_RANGE_MPEG;
579 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
580 w = get_bits(&s->gb, 16) + 1;
581 h = get_bits(&s->gb, 16) + 1;
582 if (get_bits1(&s->gb)) // display size
583 skip_bits(&s->gb, 32);
585 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
586 s->s.h.refidx[0] = get_bits(&s->gb, 3);
587 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
588 s->s.h.refidx[1] = get_bits(&s->gb, 3);
589 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
590 s->s.h.refidx[2] = get_bits(&s->gb, 3);
591 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
592 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
593 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
594 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
595 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
596 return AVERROR_INVALIDDATA;
598 if (get_bits1(&s->gb)) {
599 w = s->s.refs[s->s.h.refidx[0]].f->width;
600 h = s->s.refs[s->s.h.refidx[0]].f->height;
601 } else if (get_bits1(&s->gb)) {
602 w = s->s.refs[s->s.h.refidx[1]].f->width;
603 h = s->s.refs[s->s.h.refidx[1]].f->height;
604 } else if (get_bits1(&s->gb)) {
605 w = s->s.refs[s->s.h.refidx[2]].f->width;
606 h = s->s.refs[s->s.h.refidx[2]].f->height;
608 w = get_bits(&s->gb, 16) + 1;
609 h = get_bits(&s->gb, 16) + 1;
611 // Note that in this code, "CUR_FRAME" is actually before we
612 // have formally allocated a frame, and thus actually represents
614 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
615 s->s.frames[CUR_FRAME].tf.f->height == h;
616 if (get_bits1(&s->gb)) // display size
617 skip_bits(&s->gb, 32);
618 s->s.h.highprecisionmvs = get_bits1(&s->gb);
619 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
621 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
622 s->s.h.signbias[0] != s->s.h.signbias[2];
623 if (s->s.h.allowcompinter) {
624 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
625 s->s.h.fixcompref = 2;
626 s->s.h.varcompref[0] = 0;
627 s->s.h.varcompref[1] = 1;
628 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
629 s->s.h.fixcompref = 1;
630 s->s.h.varcompref[0] = 0;
631 s->s.h.varcompref[1] = 2;
633 s->s.h.fixcompref = 0;
634 s->s.h.varcompref[0] = 1;
635 s->s.h.varcompref[1] = 2;
640 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
641 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
642 s->s.h.framectxid = c = get_bits(&s->gb, 2);
643 if (s->s.h.keyframe || s->s.h.intraonly)
644 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
646 /* loopfilter header data */
647 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
648 // reset loopfilter defaults
649 s->s.h.lf_delta.ref[0] = 1;
650 s->s.h.lf_delta.ref[1] = 0;
651 s->s.h.lf_delta.ref[2] = -1;
652 s->s.h.lf_delta.ref[3] = -1;
653 s->s.h.lf_delta.mode[0] = 0;
654 s->s.h.lf_delta.mode[1] = 0;
655 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
657 s->s.h.filter.level = get_bits(&s->gb, 6);
658 sharp = get_bits(&s->gb, 3);
659 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
660 // the old cache values since they are still valid
661 if (s->s.h.filter.sharpness != sharp) {
662 for (i = 1; i <= 63; i++) {
666 limit >>= (sharp + 3) >> 2;
667 limit = FFMIN(limit, 9 - sharp);
669 limit = FFMAX(limit, 1);
671 s->filter_lut.lim_lut[i] = limit;
672 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
675 s->s.h.filter.sharpness = sharp;
676 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
677 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
678 for (i = 0; i < 4; i++)
679 if (get_bits1(&s->gb))
680 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
681 for (i = 0; i < 2; i++)
682 if (get_bits1(&s->gb))
683 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
687 /* quantization header data */
688 s->s.h.yac_qi = get_bits(&s->gb, 8);
689 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
690 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
691 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
692 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
693 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
695 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
697 /* segmentation header info */
698 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
699 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
700 for (i = 0; i < 7; i++)
701 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
702 get_bits(&s->gb, 8) : 255;
703 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
704 for (i = 0; i < 3; i++)
705 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
706 get_bits(&s->gb, 8) : 255;
709 if (get_bits1(&s->gb)) {
710 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
711 for (i = 0; i < 8; i++) {
712 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
713 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
714 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
715 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
716 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
717 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
718 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
723 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
724 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
725 int qyac, qydc, quvac, quvdc, lflvl, sh;
727 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
728 if (s->s.h.segmentation.absolute_vals)
729 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
731 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
733 qyac = s->s.h.yac_qi;
735 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
736 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
737 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
738 qyac = av_clip_uintp2(qyac, 8);
740 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
741 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
742 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
743 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
745 sh = s->s.h.filter.level >= 32;
746 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
747 if (s->s.h.segmentation.absolute_vals)
748 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
750 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
752 lflvl = s->s.h.filter.level;
754 if (s->s.h.lf_delta.enabled) {
755 s->s.h.segmentation.feat[i].lflvl[0][0] =
756 s->s.h.segmentation.feat[i].lflvl[0][1] =
757 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
758 for (j = 1; j < 4; j++) {
759 s->s.h.segmentation.feat[i].lflvl[j][0] =
760 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
761 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
762 s->s.h.segmentation.feat[i].lflvl[j][1] =
763 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
764 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
767 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
768 sizeof(s->s.h.segmentation.feat[i].lflvl));
773 if ((ret = update_size(avctx, w, h)) < 0) {
774 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
778 for (s->s.h.tiling.log2_tile_cols = 0;
779 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
780 s->s.h.tiling.log2_tile_cols++) ;
781 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
782 max = FFMAX(0, max - 1);
783 while (max > s->s.h.tiling.log2_tile_cols) {
784 if (get_bits1(&s->gb))
785 s->s.h.tiling.log2_tile_cols++;
789 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
790 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
791 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
796 for (i = 0; i < s->active_tile_cols; i++)
797 vp9_tile_data_free(&s->td[i]);
801 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
802 vp9_free_entries(avctx);
803 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
804 s->s.h.tiling.tile_cols : 1;
805 vp9_alloc_entries(avctx, s->sb_rows);
806 if (avctx->active_thread_type == FF_THREAD_SLICE) {
807 n_range_coders = 4; // max_tile_rows
809 n_range_coders = s->s.h.tiling.tile_cols;
811 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
812 n_range_coders * sizeof(VP56RangeCoder));
814 return AVERROR(ENOMEM);
815 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
816 for (i = 0; i < s->active_tile_cols; i++) {
819 rc += n_range_coders;
823 /* check reference frames */
824 if (!s->s.h.keyframe && !s->s.h.intraonly) {
825 int valid_ref_frame = 0;
826 for (i = 0; i < 3; i++) {
827 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
828 int refw = ref->width, refh = ref->height;
830 if (ref->format != avctx->pix_fmt) {
831 av_log(avctx, AV_LOG_ERROR,
832 "Ref pixfmt (%s) did not match current frame (%s)",
833 av_get_pix_fmt_name(ref->format),
834 av_get_pix_fmt_name(avctx->pix_fmt));
835 return AVERROR_INVALIDDATA;
836 } else if (refw == w && refh == h) {
837 s->mvscale[i][0] = s->mvscale[i][1] = 0;
839 /* Check to make sure at least one of frames that */
840 /* this frame references has valid dimensions */
841 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
842 av_log(avctx, AV_LOG_WARNING,
843 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
845 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
848 s->mvscale[i][0] = (refw << 14) / w;
849 s->mvscale[i][1] = (refh << 14) / h;
850 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
851 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
855 if (!valid_ref_frame) {
856 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
857 return AVERROR_INVALIDDATA;
861 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
862 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
863 s->prob_ctx[3].p = ff_vp9_default_probs;
864 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
865 sizeof(ff_vp9_default_coef_probs));
866 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
867 sizeof(ff_vp9_default_coef_probs));
868 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
869 sizeof(ff_vp9_default_coef_probs));
870 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
871 sizeof(ff_vp9_default_coef_probs));
872 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
873 s->prob_ctx[c].p = ff_vp9_default_probs;
874 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
875 sizeof(ff_vp9_default_coef_probs));
878 // next 16 bits is size of the rest of the header (arith-coded)
879 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
880 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
882 data2 = align_get_bits(&s->gb);
883 if (size2 > size - (data2 - data)) {
884 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
885 return AVERROR_INVALIDDATA;
887 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
891 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
892 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
893 return AVERROR_INVALIDDATA;
896 for (i = 0; i < s->active_tile_cols; i++) {
897 if (s->s.h.keyframe || s->s.h.intraonly) {
898 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
899 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
901 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
903 s->td[i].nb_block_structure = 0;
906 /* FIXME is it faster to not copy here, but do it down in the fw updates
907 * as explicit copies if the fw update is missing (and skip the copy upon
909 s->prob.p = s->prob_ctx[c].p;
912 if (s->s.h.lossless) {
913 s->s.h.txfmmode = TX_4X4;
915 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
916 if (s->s.h.txfmmode == 3)
917 s->s.h.txfmmode += vp8_rac_get(&s->c);
919 if (s->s.h.txfmmode == TX_SWITCHABLE) {
920 for (i = 0; i < 2; i++)
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
923 for (i = 0; i < 2; i++)
924 for (j = 0; j < 2; j++)
925 if (vp56_rac_get_prob_branchy(&s->c, 252))
926 s->prob.p.tx16p[i][j] =
927 update_prob(&s->c, s->prob.p.tx16p[i][j]);
928 for (i = 0; i < 2; i++)
929 for (j = 0; j < 3; j++)
930 if (vp56_rac_get_prob_branchy(&s->c, 252))
931 s->prob.p.tx32p[i][j] =
932 update_prob(&s->c, s->prob.p.tx32p[i][j]);
937 for (i = 0; i < 4; i++) {
938 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
939 if (vp8_rac_get(&s->c)) {
940 for (j = 0; j < 2; j++)
941 for (k = 0; k < 2; k++)
942 for (l = 0; l < 6; l++)
943 for (m = 0; m < 6; m++) {
944 uint8_t *p = s->prob.coef[i][j][k][l][m];
945 uint8_t *r = ref[j][k][l][m];
946 if (m >= 3 && l == 0) // dc only has 3 pt
948 for (n = 0; n < 3; n++) {
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 p[n] = update_prob(&s->c, r[n]);
954 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
957 for (j = 0; j < 2; j++)
958 for (k = 0; k < 2; k++)
959 for (l = 0; l < 6; l++)
960 for (m = 0; m < 6; m++) {
961 uint8_t *p = s->prob.coef[i][j][k][l][m];
962 uint8_t *r = ref[j][k][l][m];
963 if (m > 3 && l == 0) // dc only has 3 pt
966 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
969 if (s->s.h.txfmmode == i)
974 for (i = 0; i < 3; i++)
975 if (vp56_rac_get_prob_branchy(&s->c, 252))
976 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
977 if (!s->s.h.keyframe && !s->s.h.intraonly) {
978 for (i = 0; i < 7; i++)
979 for (j = 0; j < 3; j++)
980 if (vp56_rac_get_prob_branchy(&s->c, 252))
981 s->prob.p.mv_mode[i][j] =
982 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
984 if (s->s.h.filtermode == FILTER_SWITCHABLE)
985 for (i = 0; i < 4; i++)
986 for (j = 0; j < 2; j++)
987 if (vp56_rac_get_prob_branchy(&s->c, 252))
988 s->prob.p.filter[i][j] =
989 update_prob(&s->c, s->prob.p.filter[i][j]);
991 for (i = 0; i < 4; i++)
992 if (vp56_rac_get_prob_branchy(&s->c, 252))
993 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
995 if (s->s.h.allowcompinter) {
996 s->s.h.comppredmode = vp8_rac_get(&s->c);
997 if (s->s.h.comppredmode)
998 s->s.h.comppredmode += vp8_rac_get(&s->c);
999 if (s->s.h.comppredmode == PRED_SWITCHABLE)
1000 for (i = 0; i < 5; i++)
1001 if (vp56_rac_get_prob_branchy(&s->c, 252))
1003 update_prob(&s->c, s->prob.p.comp[i]);
1005 s->s.h.comppredmode = PRED_SINGLEREF;
1008 if (s->s.h.comppredmode != PRED_COMPREF) {
1009 for (i = 0; i < 5; i++) {
1010 if (vp56_rac_get_prob_branchy(&s->c, 252))
1011 s->prob.p.single_ref[i][0] =
1012 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.single_ref[i][1] =
1015 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1019 if (s->s.h.comppredmode != PRED_SINGLEREF) {
1020 for (i = 0; i < 5; i++)
1021 if (vp56_rac_get_prob_branchy(&s->c, 252))
1022 s->prob.p.comp_ref[i] =
1023 update_prob(&s->c, s->prob.p.comp_ref[i]);
1026 for (i = 0; i < 4; i++)
1027 for (j = 0; j < 9; j++)
1028 if (vp56_rac_get_prob_branchy(&s->c, 252))
1029 s->prob.p.y_mode[i][j] =
1030 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1032 for (i = 0; i < 4; i++)
1033 for (j = 0; j < 4; j++)
1034 for (k = 0; k < 3; k++)
1035 if (vp56_rac_get_prob_branchy(&s->c, 252))
1036 s->prob.p.partition[3 - i][j][k] =
1038 s->prob.p.partition[3 - i][j][k]);
1040 // mv fields don't use the update_prob subexp model for some reason
1041 for (i = 0; i < 3; i++)
1042 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1045 for (i = 0; i < 2; i++) {
1046 if (vp56_rac_get_prob_branchy(&s->c, 252))
1047 s->prob.p.mv_comp[i].sign =
1048 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050 for (j = 0; j < 10; j++)
1051 if (vp56_rac_get_prob_branchy(&s->c, 252))
1052 s->prob.p.mv_comp[i].classes[j] =
1053 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1055 if (vp56_rac_get_prob_branchy(&s->c, 252))
1056 s->prob.p.mv_comp[i].class0 =
1057 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1059 for (j = 0; j < 10; j++)
1060 if (vp56_rac_get_prob_branchy(&s->c, 252))
1061 s->prob.p.mv_comp[i].bits[j] =
1062 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1065 for (i = 0; i < 2; i++) {
1066 for (j = 0; j < 2; j++)
1067 for (k = 0; k < 3; k++)
1068 if (vp56_rac_get_prob_branchy(&s->c, 252))
1069 s->prob.p.mv_comp[i].class0_fp[j][k] =
1070 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1072 for (j = 0; j < 3; j++)
1073 if (vp56_rac_get_prob_branchy(&s->c, 252))
1074 s->prob.p.mv_comp[i].fp[j] =
1075 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1078 if (s->s.h.highprecisionmvs) {
1079 for (i = 0; i < 2; i++) {
1080 if (vp56_rac_get_prob_branchy(&s->c, 252))
1081 s->prob.p.mv_comp[i].class0_hp =
1082 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1084 if (vp56_rac_get_prob_branchy(&s->c, 252))
1085 s->prob.p.mv_comp[i].hp =
1086 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1091 return (data2 - data) + size2;
1094 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1095 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1097 const VP9Context *s = td->s;
1098 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1099 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1100 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1101 s->prob.p.partition[bl][c];
1102 enum BlockPartition bp;
1103 ptrdiff_t hbs = 4 >> bl;
1104 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1105 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1106 int bytesperpixel = s->bytesperpixel;
1109 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1110 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1111 } else if (col + hbs < s->cols) { // FIXME why not <=?
1112 if (row + hbs < s->rows) { // FIXME why not <=?
1113 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1115 case PARTITION_NONE:
1116 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1119 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1120 yoff += hbs * 8 * y_stride;
1121 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1122 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1125 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1126 yoff += hbs * 8 * bytesperpixel;
1127 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1128 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1130 case PARTITION_SPLIT:
1131 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1132 decode_sb(td, row, col + hbs, lflvl,
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1135 yoff += hbs * 8 * y_stride;
1136 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1137 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1138 decode_sb(td, row + hbs, col + hbs, lflvl,
1139 yoff + 8 * hbs * bytesperpixel,
1140 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1145 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1146 bp = PARTITION_SPLIT;
1147 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1148 decode_sb(td, row, col + hbs, lflvl,
1149 yoff + 8 * hbs * bytesperpixel,
1150 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1153 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1155 } else if (row + hbs < s->rows) { // FIXME why not <=?
1156 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1157 bp = PARTITION_SPLIT;
1158 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1159 yoff += hbs * 8 * y_stride;
1160 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1161 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1164 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1167 bp = PARTITION_SPLIT;
1168 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1170 td->counts.partition[bl][c][bp]++;
1173 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1174 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1176 const VP9Context *s = td->s;
1177 VP9Block *b = td->b;
1178 ptrdiff_t hbs = 4 >> bl;
1179 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1180 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1181 int bytesperpixel = s->bytesperpixel;
1184 av_assert2(b->bl == BL_8X8);
1185 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1186 } else if (td->b->bl == bl) {
1187 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1188 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1189 yoff += hbs * 8 * y_stride;
1190 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1191 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1192 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1193 yoff += hbs * 8 * bytesperpixel;
1194 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1195 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1198 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1199 if (col + hbs < s->cols) { // FIXME why not <=?
1200 if (row + hbs < s->rows) {
1201 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1202 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1203 yoff += hbs * 8 * y_stride;
1204 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1205 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1206 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1207 yoff + 8 * hbs * bytesperpixel,
1208 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1210 yoff += hbs * 8 * bytesperpixel;
1211 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1212 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1214 } else if (row + hbs < s->rows) {
1215 yoff += hbs * 8 * y_stride;
1216 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1217 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1222 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1224 int sb_start = ( idx * n) >> log2_n;
1225 int sb_end = ((idx + 1) * n) >> log2_n;
1226 *start = FFMIN(sb_start, n) << 3;
1227 *end = FFMIN(sb_end, n) << 3;
1230 static void free_buffers(VP9Context *s)
1234 av_freep(&s->intra_pred_data[0]);
1235 for (i = 0; i < s->active_tile_cols; i++)
1236 vp9_tile_data_free(&s->td[i]);
1239 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1241 VP9Context *s = avctx->priv_data;
1244 for (i = 0; i < 3; i++) {
1245 vp9_frame_unref(avctx, &s->s.frames[i]);
1246 av_frame_free(&s->s.frames[i].tf.f);
1248 av_buffer_pool_uninit(&s->frame_extradata_pool);
1249 for (i = 0; i < 8; i++) {
1250 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1251 av_frame_free(&s->s.refs[i].f);
1252 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1253 av_frame_free(&s->next_refs[i].f);
1257 vp9_free_entries(avctx);
1262 static int decode_tiles(AVCodecContext *avctx,
1263 const uint8_t *data, int size)
1265 VP9Context *s = avctx->priv_data;
1266 VP9TileData *td = &s->td[0];
1267 int row, col, tile_row, tile_col, ret;
1269 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1271 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1273 f = s->s.frames[CUR_FRAME].tf.f;
1274 ls_y = f->linesize[0];
1275 ls_uv =f->linesize[1];
1276 bytesperpixel = s->bytesperpixel;
1279 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1280 set_tile_offset(&tile_row_start, &tile_row_end,
1281 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1283 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1286 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1287 tile_row == s->s.h.tiling.tile_rows - 1) {
1290 tile_size = AV_RB32(data);
1294 if (tile_size > size) {
1295 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1296 return AVERROR_INVALIDDATA;
1298 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1301 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1302 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1303 return AVERROR_INVALIDDATA;
1309 for (row = tile_row_start; row < tile_row_end;
1310 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1311 VP9Filter *lflvl_ptr = s->lflvl;
1312 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1314 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1315 set_tile_offset(&tile_col_start, &tile_col_end,
1316 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1317 td->tile_col_start = tile_col_start;
1319 memset(td->left_partition_ctx, 0, 8);
1320 memset(td->left_skip_ctx, 0, 8);
1321 if (s->s.h.keyframe || s->s.h.intraonly) {
1322 memset(td->left_mode_ctx, DC_PRED, 16);
1324 memset(td->left_mode_ctx, NEARESTMV, 8);
1326 memset(td->left_y_nnz_ctx, 0, 16);
1327 memset(td->left_uv_nnz_ctx, 0, 32);
1328 memset(td->left_segpred_ctx, 0, 8);
1330 td->c = &td->c_b[tile_col];
1333 for (col = tile_col_start;
1335 col += 8, yoff2 += 64 * bytesperpixel,
1336 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1337 // FIXME integrate with lf code (i.e. zero after each
1338 // use, similar to invtxfm coefficients, or similar)
1340 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1344 decode_sb_mem(td, row, col, lflvl_ptr,
1345 yoff2, uvoff2, BL_64X64);
1347 if (vpX_rac_is_end(td->c)) {
1348 return AVERROR_INVALIDDATA;
1350 decode_sb(td, row, col, lflvl_ptr,
1351 yoff2, uvoff2, BL_64X64);
1359 // backup pre-loopfilter reconstruction data for intra
1360 // prediction of next row of sb64s
1361 if (row + 8 < s->rows) {
1362 memcpy(s->intra_pred_data[0],
1363 f->data[0] + yoff + 63 * ls_y,
1364 8 * s->cols * bytesperpixel);
1365 memcpy(s->intra_pred_data[1],
1366 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1367 8 * s->cols * bytesperpixel >> s->ss_h);
1368 memcpy(s->intra_pred_data[2],
1369 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1370 8 * s->cols * bytesperpixel >> s->ss_h);
1373 // loopfilter one row
1374 if (s->s.h.filter.level) {
1377 lflvl_ptr = s->lflvl;
1378 for (col = 0; col < s->cols;
1379 col += 8, yoff2 += 64 * bytesperpixel,
1380 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1381 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1386 // FIXME maybe we can make this more finegrained by running the
1387 // loopfilter per-block instead of after each sbrow
1388 // In fact that would also make intra pred left preparation easier?
1389 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1396 static av_always_inline
1397 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1400 VP9Context *s = avctx->priv_data;
1401 VP9TileData *td = &s->td[jobnr];
1402 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1403 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1404 unsigned tile_cols_len;
1405 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1406 VP9Filter *lflvl_ptr_base;
1409 f = s->s.frames[CUR_FRAME].tf.f;
1410 ls_y = f->linesize[0];
1411 ls_uv =f->linesize[1];
1413 set_tile_offset(&tile_col_start, &tile_col_end,
1414 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1415 td->tile_col_start = tile_col_start;
1416 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1417 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1418 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1420 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1421 set_tile_offset(&tile_row_start, &tile_row_end,
1422 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1424 td->c = &td->c_b[tile_row];
1425 for (row = tile_row_start; row < tile_row_end;
1426 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1427 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1428 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1430 memset(td->left_partition_ctx, 0, 8);
1431 memset(td->left_skip_ctx, 0, 8);
1432 if (s->s.h.keyframe || s->s.h.intraonly) {
1433 memset(td->left_mode_ctx, DC_PRED, 16);
1435 memset(td->left_mode_ctx, NEARESTMV, 8);
1437 memset(td->left_y_nnz_ctx, 0, 16);
1438 memset(td->left_uv_nnz_ctx, 0, 32);
1439 memset(td->left_segpred_ctx, 0, 8);
1441 for (col = tile_col_start;
1443 col += 8, yoff2 += 64 * bytesperpixel,
1444 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1445 // FIXME integrate with lf code (i.e. zero after each
1446 // use, similar to invtxfm coefficients, or similar)
1447 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1448 decode_sb(td, row, col, lflvl_ptr,
1449 yoff2, uvoff2, BL_64X64);
1452 // backup pre-loopfilter reconstruction data for intra
1453 // prediction of next row of sb64s
1454 tile_cols_len = tile_col_end - tile_col_start;
1455 if (row + 8 < s->rows) {
1456 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1457 f->data[0] + yoff + 63 * ls_y,
1458 8 * tile_cols_len * bytesperpixel);
1459 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1460 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1461 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1462 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1463 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1464 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1467 vp9_report_tile_progress(s, row >> 3, 1);
1473 static av_always_inline
1474 int loopfilter_proc(AVCodecContext *avctx)
1476 VP9Context *s = avctx->priv_data;
1477 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1478 VP9Filter *lflvl_ptr;
1479 int bytesperpixel = s->bytesperpixel, col, i;
1482 f = s->s.frames[CUR_FRAME].tf.f;
1483 ls_y = f->linesize[0];
1484 ls_uv =f->linesize[1];
1486 for (i = 0; i < s->sb_rows; i++) {
1487 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1489 if (s->s.h.filter.level) {
1490 yoff = (ls_y * 64)*i;
1491 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1492 lflvl_ptr = s->lflvl+s->sb_cols*i;
1493 for (col = 0; col < s->cols;
1494 col += 8, yoff += 64 * bytesperpixel,
1495 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1496 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1505 static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
1507 AVVideoEncParams *par;
1508 unsigned int tile, nb_blocks = 0;
1510 if (s->s.h.segmentation.enabled) {
1511 for (tile = 0; tile < s->active_tile_cols; tile++)
1512 nb_blocks += s->td[tile].nb_block_structure;
1515 par = av_video_enc_params_create_side_data(frame->tf.f,
1516 AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
1518 return AVERROR(ENOMEM);
1520 par->qp = s->s.h.yac_qi;
1521 par->delta_qp[0][0] = s->s.h.ydc_qdelta;
1522 par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
1523 par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
1524 par->delta_qp[1][1] = s->s.h.uvac_qdelta;
1525 par->delta_qp[2][1] = s->s.h.uvac_qdelta;
1528 unsigned int block = 0;
1529 unsigned int tile, block_tile;
1531 for (tile = 0; tile < s->active_tile_cols; tile++) {
1532 VP9TileData *td = &s->td[tile];
1534 for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
1535 AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
1536 unsigned int row = td->block_structure[block_tile].row;
1537 unsigned int col = td->block_structure[block_tile].col;
1538 uint8_t seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
1542 b->w = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
1543 b->h = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
1545 if (s->s.h.segmentation.feat[seg_id].q_enabled) {
1546 b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
1547 if (s->s.h.segmentation.absolute_vals)
1548 b->delta_qp -= par->qp;
1557 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1558 int *got_frame, AVPacket *pkt)
1560 const uint8_t *data = pkt->data;
1561 int size = pkt->size;
1562 VP9Context *s = avctx->priv_data;
1564 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1565 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1568 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1570 } else if (ret == 0) {
1571 if (!s->s.refs[ref].f->buf[0]) {
1572 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1573 return AVERROR_INVALIDDATA;
1575 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1577 ((AVFrame *)frame)->pts = pkt->pts;
1579 FF_DISABLE_DEPRECATION_WARNINGS
1580 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1581 FF_ENABLE_DEPRECATION_WARNINGS
1583 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1584 for (i = 0; i < 8; i++) {
1585 if (s->next_refs[i].f->buf[0])
1586 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1587 if (s->s.refs[i].f->buf[0] &&
1588 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1597 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1598 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1599 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1600 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1601 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1604 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1605 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1606 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1607 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1609 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1610 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1611 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1613 f = s->s.frames[CUR_FRAME].tf.f;
1614 f->key_frame = s->s.h.keyframe;
1615 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1617 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1618 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1619 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1620 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1624 for (i = 0; i < 8; i++) {
1625 if (s->next_refs[i].f->buf[0])
1626 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1627 if (s->s.h.refreshrefmask & (1 << i)) {
1628 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1629 } else if (s->s.refs[i].f->buf[0]) {
1630 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1636 if (avctx->hwaccel) {
1637 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1640 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1643 ret = avctx->hwaccel->end_frame(avctx);
1649 // main tile decode loop
1650 memset(s->above_partition_ctx, 0, s->cols);
1651 memset(s->above_skip_ctx, 0, s->cols);
1652 if (s->s.h.keyframe || s->s.h.intraonly) {
1653 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1655 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1657 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1658 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1659 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1660 memset(s->above_segpred_ctx, 0, s->cols);
1661 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1662 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1663 if ((ret = update_block_buffers(avctx)) < 0) {
1664 av_log(avctx, AV_LOG_ERROR,
1665 "Failed to allocate block buffers\n");
1668 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1671 for (i = 0; i < 4; i++) {
1672 for (j = 0; j < 2; j++)
1673 for (k = 0; k < 2; k++)
1674 for (l = 0; l < 6; l++)
1675 for (m = 0; m < 6; m++)
1676 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1677 s->prob.coef[i][j][k][l][m], 3);
1678 if (s->s.h.txfmmode == i)
1681 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1682 ff_thread_finish_setup(avctx);
1683 } else if (!s->s.h.refreshctx) {
1684 ff_thread_finish_setup(avctx);
1688 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1689 for (i = 0; i < s->sb_rows; i++)
1690 atomic_store(&s->entries[i], 0);
1695 for (i = 0; i < s->active_tile_cols; i++) {
1696 s->td[i].b = s->td[i].b_base;
1697 s->td[i].block = s->td[i].block_base;
1698 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1699 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1700 s->td[i].eob = s->td[i].eob_base;
1701 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1702 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1703 s->td[i].error_info = 0;
1707 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1708 int tile_row, tile_col;
1710 av_assert1(!s->pass);
1712 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1713 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1716 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1717 tile_row == s->s.h.tiling.tile_rows - 1) {
1720 tile_size = AV_RB32(data);
1724 if (tile_size > size)
1725 return AVERROR_INVALIDDATA;
1726 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1729 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1730 return AVERROR_INVALIDDATA;
1736 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1740 ret = decode_tiles(avctx, data, size);
1742 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1747 // Sum all counts fields into td[0].counts for tile threading
1748 if (avctx->active_thread_type == FF_THREAD_SLICE)
1749 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1750 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1751 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1753 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1754 ff_vp9_adapt_probs(s);
1755 ff_thread_finish_setup(avctx);
1757 } while (s->pass++ == 1);
1758 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1760 if (s->td->error_info < 0) {
1761 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1762 s->td->error_info = 0;
1763 return AVERROR_INVALIDDATA;
1765 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
1766 ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
1773 for (i = 0; i < 8; i++) {
1774 if (s->s.refs[i].f->buf[0])
1775 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1776 if (s->next_refs[i].f->buf[0] &&
1777 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1781 if (!s->s.h.invisible) {
1782 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1790 static void vp9_decode_flush(AVCodecContext *avctx)
1792 VP9Context *s = avctx->priv_data;
1795 for (i = 0; i < 3; i++)
1796 vp9_frame_unref(avctx, &s->s.frames[i]);
1797 for (i = 0; i < 8; i++)
1798 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1801 static int init_frames(AVCodecContext *avctx)
1803 VP9Context *s = avctx->priv_data;
1806 for (i = 0; i < 3; i++) {
1807 s->s.frames[i].tf.f = av_frame_alloc();
1808 if (!s->s.frames[i].tf.f) {
1809 vp9_decode_free(avctx);
1810 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1811 return AVERROR(ENOMEM);
1814 for (i = 0; i < 8; i++) {
1815 s->s.refs[i].f = av_frame_alloc();
1816 s->next_refs[i].f = av_frame_alloc();
1817 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1818 vp9_decode_free(avctx);
1819 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1820 return AVERROR(ENOMEM);
1827 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1829 VP9Context *s = avctx->priv_data;
1832 s->s.h.filter.sharpness = -1;
1834 return init_frames(avctx);
1838 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1841 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1843 for (i = 0; i < 3; i++) {
1844 if (s->s.frames[i].tf.f->buf[0])
1845 vp9_frame_unref(dst, &s->s.frames[i]);
1846 if (ssrc->s.frames[i].tf.f->buf[0]) {
1847 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1851 for (i = 0; i < 8; i++) {
1852 if (s->s.refs[i].f->buf[0])
1853 ff_thread_release_buffer(dst, &s->s.refs[i]);
1854 if (ssrc->next_refs[i].f->buf[0]) {
1855 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1860 s->s.h.invisible = ssrc->s.h.invisible;
1861 s->s.h.keyframe = ssrc->s.h.keyframe;
1862 s->s.h.intraonly = ssrc->s.h.intraonly;
1863 s->ss_v = ssrc->ss_v;
1864 s->ss_h = ssrc->ss_h;
1865 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1866 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1867 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1868 s->bytesperpixel = ssrc->bytesperpixel;
1869 s->gf_fmt = ssrc->gf_fmt;
1872 s->s.h.bpp = ssrc->s.h.bpp;
1873 s->bpp_index = ssrc->bpp_index;
1874 s->pix_fmt = ssrc->pix_fmt;
1875 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1876 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1877 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1878 sizeof(s->s.h.segmentation.feat));
1884 AVCodec ff_vp9_decoder = {
1886 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1887 .type = AVMEDIA_TYPE_VIDEO,
1888 .id = AV_CODEC_ID_VP9,
1889 .priv_data_size = sizeof(VP9Context),
1890 .init = vp9_decode_init,
1891 .close = vp9_decode_free,
1892 .decode = vp9_decode_frame,
1893 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1894 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1895 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1896 .flush = vp9_decode_flush,
1897 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1898 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1899 .bsfs = "vp9_superframe_split",
1900 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1901 #if CONFIG_VP9_DXVA2_HWACCEL
1904 #if CONFIG_VP9_D3D11VA_HWACCEL
1905 HWACCEL_D3D11VA(vp9),
1907 #if CONFIG_VP9_D3D11VA2_HWACCEL
1908 HWACCEL_D3D11VA2(vp9),
1910 #if CONFIG_VP9_NVDEC_HWACCEL
1913 #if CONFIG_VP9_VAAPI_HWACCEL
1916 #if CONFIG_VP9_VDPAU_HWACCEL