2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
37 #include "libavutil/video_enc_params.h"
39 #define VP9_SYNCCODE 0x498342
42 static void vp9_free_entries(AVCodecContext *avctx) {
43 VP9Context *s = avctx->priv_data;
45 if (avctx->active_thread_type & FF_THREAD_SLICE) {
46 pthread_mutex_destroy(&s->progress_mutex);
47 pthread_cond_destroy(&s->progress_cond);
48 av_freep(&s->entries);
52 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
53 VP9Context *s = avctx->priv_data;
56 if (avctx->active_thread_type & FF_THREAD_SLICE) {
58 av_freep(&s->entries);
60 s->entries = av_malloc_array(n, sizeof(atomic_int));
63 av_freep(&s->entries);
64 return AVERROR(ENOMEM);
67 for (i = 0; i < n; i++)
68 atomic_init(&s->entries[i], 0);
70 pthread_mutex_init(&s->progress_mutex, NULL);
71 pthread_cond_init(&s->progress_cond, NULL);
76 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
77 pthread_mutex_lock(&s->progress_mutex);
78 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
79 pthread_cond_signal(&s->progress_cond);
80 pthread_mutex_unlock(&s->progress_mutex);
83 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
84 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
87 pthread_mutex_lock(&s->progress_mutex);
88 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
89 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
90 pthread_mutex_unlock(&s->progress_mutex);
93 static void vp9_free_entries(AVCodecContext *avctx) {}
94 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
97 static void vp9_tile_data_free(VP9TileData *td)
99 av_freep(&td->b_base);
100 av_freep(&td->block_base);
101 av_freep(&td->block_structure);
104 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
106 ff_thread_release_buffer(avctx, &f->tf);
107 av_buffer_unref(&f->extradata);
108 av_buffer_unref(&f->hwaccel_priv_buf);
109 f->segmentation_map = NULL;
110 f->hwaccel_picture_private = NULL;
113 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
115 VP9Context *s = avctx->priv_data;
118 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
122 sz = 64 * s->sb_cols * s->sb_rows;
123 if (sz != s->frame_extradata_pool_size) {
124 av_buffer_pool_uninit(&s->frame_extradata_pool);
125 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
126 if (!s->frame_extradata_pool) {
127 s->frame_extradata_pool_size = 0;
130 s->frame_extradata_pool_size = sz;
132 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
136 memset(f->extradata->data, 0, f->extradata->size);
138 f->segmentation_map = f->extradata->data;
139 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
141 if (avctx->hwaccel) {
142 const AVHWAccel *hwaccel = avctx->hwaccel;
143 av_assert0(!f->hwaccel_picture_private);
144 if (hwaccel->frame_priv_data_size) {
145 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
146 if (!f->hwaccel_priv_buf)
148 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
155 vp9_frame_unref(avctx, f);
156 return AVERROR(ENOMEM);
159 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
163 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
167 dst->extradata = av_buffer_ref(src->extradata);
171 dst->segmentation_map = src->segmentation_map;
173 dst->uses_2pass = src->uses_2pass;
175 if (src->hwaccel_picture_private) {
176 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
177 if (!dst->hwaccel_priv_buf)
179 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
185 vp9_frame_unref(avctx, dst);
186 return AVERROR(ENOMEM);
189 static int update_size(AVCodecContext *avctx, int w, int h)
191 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
192 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
193 CONFIG_VP9_NVDEC_HWACCEL + \
194 CONFIG_VP9_VAAPI_HWACCEL + \
195 CONFIG_VP9_VDPAU_HWACCEL)
196 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
197 VP9Context *s = avctx->priv_data;
199 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
202 av_assert0(w > 0 && h > 0);
204 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
205 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
208 switch (s->pix_fmt) {
209 case AV_PIX_FMT_YUV420P:
210 #if CONFIG_VP9_VDPAU_HWACCEL
211 *fmtp++ = AV_PIX_FMT_VDPAU;
213 case AV_PIX_FMT_YUV420P10:
214 #if CONFIG_VP9_DXVA2_HWACCEL
215 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
217 #if CONFIG_VP9_D3D11VA_HWACCEL
218 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
219 *fmtp++ = AV_PIX_FMT_D3D11;
221 #if CONFIG_VP9_NVDEC_HWACCEL
222 *fmtp++ = AV_PIX_FMT_CUDA;
224 #if CONFIG_VP9_VAAPI_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VAAPI;
228 case AV_PIX_FMT_YUV420P12:
229 #if CONFIG_VP9_NVDEC_HWACCEL
230 *fmtp++ = AV_PIX_FMT_CUDA;
232 #if CONFIG_VP9_VAAPI_HWACCEL
233 *fmtp++ = AV_PIX_FMT_VAAPI;
238 *fmtp++ = s->pix_fmt;
239 *fmtp = AV_PIX_FMT_NONE;
241 ret = ff_thread_get_format(avctx, pix_fmts);
245 avctx->pix_fmt = ret;
246 s->gf_fmt = s->pix_fmt;
254 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
257 s->last_fmt = s->pix_fmt;
258 s->sb_cols = (w + 63) >> 6;
259 s->sb_rows = (h + 63) >> 6;
260 s->cols = (w + 7) >> 3;
261 s->rows = (h + 7) >> 3;
262 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
264 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
265 av_freep(&s->intra_pred_data[0]);
266 // FIXME we slightly over-allocate here for subsampled chroma, but a little
267 // bit of padding shouldn't affect performance...
268 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
269 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
271 return AVERROR(ENOMEM);
272 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
273 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
274 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
275 assign(s->above_y_nnz_ctx, uint8_t *, 16);
276 assign(s->above_mode_ctx, uint8_t *, 16);
277 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
278 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
279 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
280 assign(s->above_partition_ctx, uint8_t *, 8);
281 assign(s->above_skip_ctx, uint8_t *, 8);
282 assign(s->above_txfm_ctx, uint8_t *, 8);
283 assign(s->above_segpred_ctx, uint8_t *, 8);
284 assign(s->above_intra_ctx, uint8_t *, 8);
285 assign(s->above_comp_ctx, uint8_t *, 8);
286 assign(s->above_ref_ctx, uint8_t *, 8);
287 assign(s->above_filter_ctx, uint8_t *, 8);
288 assign(s->lflvl, VP9Filter *, lflvl_len);
292 for (i = 0; i < s->active_tile_cols; i++)
293 vp9_tile_data_free(&s->td[i]);
296 if (s->s.h.bpp != s->last_bpp) {
297 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
298 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
299 s->last_bpp = s->s.h.bpp;
305 static int update_block_buffers(AVCodecContext *avctx)
308 VP9Context *s = avctx->priv_data;
309 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
310 VP9TileData *td = &s->td[0];
312 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
315 vp9_tile_data_free(td);
316 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
317 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
318 if (s->s.frames[CUR_FRAME].uses_2pass) {
319 int sbs = s->sb_cols * s->sb_rows;
321 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
322 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
323 16 * 16 + 2 * chroma_eobs) * sbs);
324 if (!td->b_base || !td->block_base)
325 return AVERROR(ENOMEM);
326 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
327 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
328 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
329 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
330 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
332 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
333 td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
334 if (!td->block_structure)
335 return AVERROR(ENOMEM);
338 for (i = 1; i < s->active_tile_cols; i++)
339 vp9_tile_data_free(&s->td[i]);
341 for (i = 0; i < s->active_tile_cols; i++) {
342 s->td[i].b_base = av_malloc(sizeof(VP9Block));
343 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
344 16 * 16 + 2 * chroma_eobs);
345 if (!s->td[i].b_base || !s->td[i].block_base)
346 return AVERROR(ENOMEM);
347 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
348 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
349 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
350 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
351 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
353 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
354 s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
355 if (!s->td[i].block_structure)
356 return AVERROR(ENOMEM);
360 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
365 // The sign bit is at the end, not the start, of a bit sequence
366 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
368 int v = get_bits(gb, n);
369 return get_bits1(gb) ? -v : v;
372 static av_always_inline int inv_recenter_nonneg(int v, int m)
377 return m - ((v + 1) >> 1);
381 // differential forward probability updates
382 static int update_prob(VP56RangeCoder *c, int p)
384 static const uint8_t inv_map_table[255] = {
385 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
386 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
387 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
388 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
389 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
390 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
391 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
392 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
393 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
394 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
395 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
396 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
397 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
398 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
399 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
400 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
401 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
402 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
407 /* This code is trying to do a differential probability update. For a
408 * current probability A in the range [1, 255], the difference to a new
409 * probability of any value can be expressed differentially as 1-A, 255-A
410 * where some part of this (absolute range) exists both in positive as
411 * well as the negative part, whereas another part only exists in one
412 * half. We're trying to code this shared part differentially, i.e.
413 * times two where the value of the lowest bit specifies the sign, and
414 * the single part is then coded on top of this. This absolute difference
415 * then again has a value of [0, 254], but a bigger value in this range
416 * indicates that we're further away from the original value A, so we
417 * can code this as a VLC code, since higher values are increasingly
418 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
419 * updates vs. the 'fine, exact' updates further down the range, which
420 * adds one extra dimension to this differential update model. */
422 if (!vp8_rac_get(c)) {
423 d = vp8_rac_get_uint(c, 4) + 0;
424 } else if (!vp8_rac_get(c)) {
425 d = vp8_rac_get_uint(c, 4) + 16;
426 } else if (!vp8_rac_get(c)) {
427 d = vp8_rac_get_uint(c, 5) + 32;
429 d = vp8_rac_get_uint(c, 7);
431 d = (d << 1) - 65 + vp8_rac_get(c);
433 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
436 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
437 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
440 static int read_colorspace_details(AVCodecContext *avctx)
442 static const enum AVColorSpace colorspaces[8] = {
443 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
444 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
446 VP9Context *s = avctx->priv_data;
447 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
450 s->s.h.bpp = 8 + bits * 2;
451 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
452 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
453 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
454 static const enum AVPixelFormat pix_fmt_rgb[3] = {
455 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
457 s->ss_h = s->ss_v = 0;
458 avctx->color_range = AVCOL_RANGE_JPEG;
459 s->pix_fmt = pix_fmt_rgb[bits];
460 if (avctx->profile & 1) {
461 if (get_bits1(&s->gb)) {
462 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
463 return AVERROR_INVALIDDATA;
466 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
468 return AVERROR_INVALIDDATA;
471 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
472 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
473 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
474 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
475 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
476 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
477 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
479 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
480 if (avctx->profile & 1) {
481 s->ss_h = get_bits1(&s->gb);
482 s->ss_v = get_bits1(&s->gb);
483 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
484 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
485 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
487 return AVERROR_INVALIDDATA;
488 } else if (get_bits1(&s->gb)) {
489 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
491 return AVERROR_INVALIDDATA;
494 s->ss_h = s->ss_v = 1;
495 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
502 static int decode_frame_header(AVCodecContext *avctx,
503 const uint8_t *data, int size, int *ref)
505 VP9Context *s = avctx->priv_data;
506 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
508 const uint8_t *data2;
511 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
512 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
515 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
516 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
517 return AVERROR_INVALIDDATA;
519 avctx->profile = get_bits1(&s->gb);
520 avctx->profile |= get_bits1(&s->gb) << 1;
521 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
522 if (avctx->profile > 3) {
523 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
524 return AVERROR_INVALIDDATA;
526 s->s.h.profile = avctx->profile;
527 if (get_bits1(&s->gb)) {
528 *ref = get_bits(&s->gb, 3);
532 s->last_keyframe = s->s.h.keyframe;
533 s->s.h.keyframe = !get_bits1(&s->gb);
535 last_invisible = s->s.h.invisible;
536 s->s.h.invisible = !get_bits1(&s->gb);
537 s->s.h.errorres = get_bits1(&s->gb);
538 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
540 if (s->s.h.keyframe) {
541 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
542 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
543 return AVERROR_INVALIDDATA;
545 if ((ret = read_colorspace_details(avctx)) < 0)
547 // for profile 1, here follows the subsampling bits
548 s->s.h.refreshrefmask = 0xff;
549 w = get_bits(&s->gb, 16) + 1;
550 h = get_bits(&s->gb, 16) + 1;
551 if (get_bits1(&s->gb)) // display size
552 skip_bits(&s->gb, 32);
554 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
555 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
556 if (s->s.h.intraonly) {
557 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
558 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
559 return AVERROR_INVALIDDATA;
561 if (avctx->profile >= 1) {
562 if ((ret = read_colorspace_details(avctx)) < 0)
565 s->ss_h = s->ss_v = 1;
568 s->bytesperpixel = 1;
569 s->pix_fmt = AV_PIX_FMT_YUV420P;
570 avctx->colorspace = AVCOL_SPC_BT470BG;
571 avctx->color_range = AVCOL_RANGE_MPEG;
573 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
574 w = get_bits(&s->gb, 16) + 1;
575 h = get_bits(&s->gb, 16) + 1;
576 if (get_bits1(&s->gb)) // display size
577 skip_bits(&s->gb, 32);
579 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
580 s->s.h.refidx[0] = get_bits(&s->gb, 3);
581 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
582 s->s.h.refidx[1] = get_bits(&s->gb, 3);
583 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
584 s->s.h.refidx[2] = get_bits(&s->gb, 3);
585 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
586 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
587 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
588 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
589 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
590 return AVERROR_INVALIDDATA;
592 if (get_bits1(&s->gb)) {
593 w = s->s.refs[s->s.h.refidx[0]].f->width;
594 h = s->s.refs[s->s.h.refidx[0]].f->height;
595 } else if (get_bits1(&s->gb)) {
596 w = s->s.refs[s->s.h.refidx[1]].f->width;
597 h = s->s.refs[s->s.h.refidx[1]].f->height;
598 } else if (get_bits1(&s->gb)) {
599 w = s->s.refs[s->s.h.refidx[2]].f->width;
600 h = s->s.refs[s->s.h.refidx[2]].f->height;
602 w = get_bits(&s->gb, 16) + 1;
603 h = get_bits(&s->gb, 16) + 1;
605 // Note that in this code, "CUR_FRAME" is actually before we
606 // have formally allocated a frame, and thus actually represents
608 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
609 s->s.frames[CUR_FRAME].tf.f->height == h;
610 if (get_bits1(&s->gb)) // display size
611 skip_bits(&s->gb, 32);
612 s->s.h.highprecisionmvs = get_bits1(&s->gb);
613 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
615 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
616 s->s.h.signbias[0] != s->s.h.signbias[2];
617 if (s->s.h.allowcompinter) {
618 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
619 s->s.h.fixcompref = 2;
620 s->s.h.varcompref[0] = 0;
621 s->s.h.varcompref[1] = 1;
622 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
623 s->s.h.fixcompref = 1;
624 s->s.h.varcompref[0] = 0;
625 s->s.h.varcompref[1] = 2;
627 s->s.h.fixcompref = 0;
628 s->s.h.varcompref[0] = 1;
629 s->s.h.varcompref[1] = 2;
634 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
635 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
636 s->s.h.framectxid = c = get_bits(&s->gb, 2);
637 if (s->s.h.keyframe || s->s.h.intraonly)
638 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
640 /* loopfilter header data */
641 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
642 // reset loopfilter defaults
643 s->s.h.lf_delta.ref[0] = 1;
644 s->s.h.lf_delta.ref[1] = 0;
645 s->s.h.lf_delta.ref[2] = -1;
646 s->s.h.lf_delta.ref[3] = -1;
647 s->s.h.lf_delta.mode[0] = 0;
648 s->s.h.lf_delta.mode[1] = 0;
649 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
651 s->s.h.filter.level = get_bits(&s->gb, 6);
652 sharp = get_bits(&s->gb, 3);
653 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
654 // the old cache values since they are still valid
655 if (s->s.h.filter.sharpness != sharp) {
656 for (i = 1; i <= 63; i++) {
660 limit >>= (sharp + 3) >> 2;
661 limit = FFMIN(limit, 9 - sharp);
663 limit = FFMAX(limit, 1);
665 s->filter_lut.lim_lut[i] = limit;
666 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
669 s->s.h.filter.sharpness = sharp;
670 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
671 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
672 for (i = 0; i < 4; i++)
673 if (get_bits1(&s->gb))
674 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
675 for (i = 0; i < 2; i++)
676 if (get_bits1(&s->gb))
677 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
681 /* quantization header data */
682 s->s.h.yac_qi = get_bits(&s->gb, 8);
683 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
684 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
685 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
686 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
687 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
689 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
691 /* segmentation header info */
692 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
693 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
694 for (i = 0; i < 7; i++)
695 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
696 get_bits(&s->gb, 8) : 255;
697 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
698 for (i = 0; i < 3; i++)
699 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
700 get_bits(&s->gb, 8) : 255;
703 if (get_bits1(&s->gb)) {
704 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
705 for (i = 0; i < 8; i++) {
706 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
707 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
708 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
709 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
710 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
711 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
712 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
717 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
718 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
719 int qyac, qydc, quvac, quvdc, lflvl, sh;
721 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
722 if (s->s.h.segmentation.absolute_vals)
723 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
725 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
727 qyac = s->s.h.yac_qi;
729 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
730 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
731 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
732 qyac = av_clip_uintp2(qyac, 8);
734 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
735 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
736 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
737 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
739 sh = s->s.h.filter.level >= 32;
740 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
741 if (s->s.h.segmentation.absolute_vals)
742 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
744 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
746 lflvl = s->s.h.filter.level;
748 if (s->s.h.lf_delta.enabled) {
749 s->s.h.segmentation.feat[i].lflvl[0][0] =
750 s->s.h.segmentation.feat[i].lflvl[0][1] =
751 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
752 for (j = 1; j < 4; j++) {
753 s->s.h.segmentation.feat[i].lflvl[j][0] =
754 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
755 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
756 s->s.h.segmentation.feat[i].lflvl[j][1] =
757 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
758 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
761 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
762 sizeof(s->s.h.segmentation.feat[i].lflvl));
767 if ((ret = update_size(avctx, w, h)) < 0) {
768 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
772 for (s->s.h.tiling.log2_tile_cols = 0;
773 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
774 s->s.h.tiling.log2_tile_cols++) ;
775 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
776 max = FFMAX(0, max - 1);
777 while (max > s->s.h.tiling.log2_tile_cols) {
778 if (get_bits1(&s->gb))
779 s->s.h.tiling.log2_tile_cols++;
783 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
784 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
785 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
790 for (i = 0; i < s->active_tile_cols; i++)
791 vp9_tile_data_free(&s->td[i]);
795 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
796 vp9_free_entries(avctx);
797 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
798 s->s.h.tiling.tile_cols : 1;
799 vp9_alloc_entries(avctx, s->sb_rows);
800 if (avctx->active_thread_type == FF_THREAD_SLICE) {
801 n_range_coders = 4; // max_tile_rows
803 n_range_coders = s->s.h.tiling.tile_cols;
805 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
806 n_range_coders * sizeof(VP56RangeCoder));
808 return AVERROR(ENOMEM);
809 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
810 for (i = 0; i < s->active_tile_cols; i++) {
813 rc += n_range_coders;
817 /* check reference frames */
818 if (!s->s.h.keyframe && !s->s.h.intraonly) {
819 int valid_ref_frame = 0;
820 for (i = 0; i < 3; i++) {
821 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
822 int refw = ref->width, refh = ref->height;
824 if (ref->format != avctx->pix_fmt) {
825 av_log(avctx, AV_LOG_ERROR,
826 "Ref pixfmt (%s) did not match current frame (%s)",
827 av_get_pix_fmt_name(ref->format),
828 av_get_pix_fmt_name(avctx->pix_fmt));
829 return AVERROR_INVALIDDATA;
830 } else if (refw == w && refh == h) {
831 s->mvscale[i][0] = s->mvscale[i][1] = 0;
833 /* Check to make sure at least one of frames that */
834 /* this frame references has valid dimensions */
835 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
836 av_log(avctx, AV_LOG_WARNING,
837 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
839 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
842 s->mvscale[i][0] = (refw << 14) / w;
843 s->mvscale[i][1] = (refh << 14) / h;
844 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
845 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
849 if (!valid_ref_frame) {
850 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
851 return AVERROR_INVALIDDATA;
855 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
856 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
857 s->prob_ctx[3].p = ff_vp9_default_probs;
858 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
859 sizeof(ff_vp9_default_coef_probs));
860 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
861 sizeof(ff_vp9_default_coef_probs));
862 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
863 sizeof(ff_vp9_default_coef_probs));
864 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
865 sizeof(ff_vp9_default_coef_probs));
866 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
867 s->prob_ctx[c].p = ff_vp9_default_probs;
868 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
869 sizeof(ff_vp9_default_coef_probs));
872 // next 16 bits is size of the rest of the header (arith-coded)
873 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
874 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
876 data2 = align_get_bits(&s->gb);
877 if (size2 > size - (data2 - data)) {
878 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
879 return AVERROR_INVALIDDATA;
881 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
885 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
886 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
887 return AVERROR_INVALIDDATA;
890 for (i = 0; i < s->active_tile_cols; i++) {
891 if (s->s.h.keyframe || s->s.h.intraonly) {
892 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
893 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
895 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
897 s->td[i].nb_block_structure = 0;
900 /* FIXME is it faster to not copy here, but do it down in the fw updates
901 * as explicit copies if the fw update is missing (and skip the copy upon
903 s->prob.p = s->prob_ctx[c].p;
906 if (s->s.h.lossless) {
907 s->s.h.txfmmode = TX_4X4;
909 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
910 if (s->s.h.txfmmode == 3)
911 s->s.h.txfmmode += vp8_rac_get(&s->c);
913 if (s->s.h.txfmmode == TX_SWITCHABLE) {
914 for (i = 0; i < 2; i++)
915 if (vp56_rac_get_prob_branchy(&s->c, 252))
916 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
917 for (i = 0; i < 2; i++)
918 for (j = 0; j < 2; j++)
919 if (vp56_rac_get_prob_branchy(&s->c, 252))
920 s->prob.p.tx16p[i][j] =
921 update_prob(&s->c, s->prob.p.tx16p[i][j]);
922 for (i = 0; i < 2; i++)
923 for (j = 0; j < 3; j++)
924 if (vp56_rac_get_prob_branchy(&s->c, 252))
925 s->prob.p.tx32p[i][j] =
926 update_prob(&s->c, s->prob.p.tx32p[i][j]);
931 for (i = 0; i < 4; i++) {
932 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
933 if (vp8_rac_get(&s->c)) {
934 for (j = 0; j < 2; j++)
935 for (k = 0; k < 2; k++)
936 for (l = 0; l < 6; l++)
937 for (m = 0; m < 6; m++) {
938 uint8_t *p = s->prob.coef[i][j][k][l][m];
939 uint8_t *r = ref[j][k][l][m];
940 if (m >= 3 && l == 0) // dc only has 3 pt
942 for (n = 0; n < 3; n++) {
943 if (vp56_rac_get_prob_branchy(&s->c, 252))
944 p[n] = update_prob(&s->c, r[n]);
948 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
951 for (j = 0; j < 2; j++)
952 for (k = 0; k < 2; k++)
953 for (l = 0; l < 6; l++)
954 for (m = 0; m < 6; m++) {
955 uint8_t *p = s->prob.coef[i][j][k][l][m];
956 uint8_t *r = ref[j][k][l][m];
957 if (m > 3 && l == 0) // dc only has 3 pt
960 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
963 if (s->s.h.txfmmode == i)
968 for (i = 0; i < 3; i++)
969 if (vp56_rac_get_prob_branchy(&s->c, 252))
970 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
971 if (!s->s.h.keyframe && !s->s.h.intraonly) {
972 for (i = 0; i < 7; i++)
973 for (j = 0; j < 3; j++)
974 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 s->prob.p.mv_mode[i][j] =
976 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
978 if (s->s.h.filtermode == FILTER_SWITCHABLE)
979 for (i = 0; i < 4; i++)
980 for (j = 0; j < 2; j++)
981 if (vp56_rac_get_prob_branchy(&s->c, 252))
982 s->prob.p.filter[i][j] =
983 update_prob(&s->c, s->prob.p.filter[i][j]);
985 for (i = 0; i < 4; i++)
986 if (vp56_rac_get_prob_branchy(&s->c, 252))
987 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
989 if (s->s.h.allowcompinter) {
990 s->s.h.comppredmode = vp8_rac_get(&s->c);
991 if (s->s.h.comppredmode)
992 s->s.h.comppredmode += vp8_rac_get(&s->c);
993 if (s->s.h.comppredmode == PRED_SWITCHABLE)
994 for (i = 0; i < 5; i++)
995 if (vp56_rac_get_prob_branchy(&s->c, 252))
997 update_prob(&s->c, s->prob.p.comp[i]);
999 s->s.h.comppredmode = PRED_SINGLEREF;
1002 if (s->s.h.comppredmode != PRED_COMPREF) {
1003 for (i = 0; i < 5; i++) {
1004 if (vp56_rac_get_prob_branchy(&s->c, 252))
1005 s->prob.p.single_ref[i][0] =
1006 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.single_ref[i][1] =
1009 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1013 if (s->s.h.comppredmode != PRED_SINGLEREF) {
1014 for (i = 0; i < 5; i++)
1015 if (vp56_rac_get_prob_branchy(&s->c, 252))
1016 s->prob.p.comp_ref[i] =
1017 update_prob(&s->c, s->prob.p.comp_ref[i]);
1020 for (i = 0; i < 4; i++)
1021 for (j = 0; j < 9; j++)
1022 if (vp56_rac_get_prob_branchy(&s->c, 252))
1023 s->prob.p.y_mode[i][j] =
1024 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1026 for (i = 0; i < 4; i++)
1027 for (j = 0; j < 4; j++)
1028 for (k = 0; k < 3; k++)
1029 if (vp56_rac_get_prob_branchy(&s->c, 252))
1030 s->prob.p.partition[3 - i][j][k] =
1032 s->prob.p.partition[3 - i][j][k]);
1034 // mv fields don't use the update_prob subexp model for some reason
1035 for (i = 0; i < 3; i++)
1036 if (vp56_rac_get_prob_branchy(&s->c, 252))
1037 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1039 for (i = 0; i < 2; i++) {
1040 if (vp56_rac_get_prob_branchy(&s->c, 252))
1041 s->prob.p.mv_comp[i].sign =
1042 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1044 for (j = 0; j < 10; j++)
1045 if (vp56_rac_get_prob_branchy(&s->c, 252))
1046 s->prob.p.mv_comp[i].classes[j] =
1047 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1049 if (vp56_rac_get_prob_branchy(&s->c, 252))
1050 s->prob.p.mv_comp[i].class0 =
1051 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1053 for (j = 0; j < 10; j++)
1054 if (vp56_rac_get_prob_branchy(&s->c, 252))
1055 s->prob.p.mv_comp[i].bits[j] =
1056 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1059 for (i = 0; i < 2; i++) {
1060 for (j = 0; j < 2; j++)
1061 for (k = 0; k < 3; k++)
1062 if (vp56_rac_get_prob_branchy(&s->c, 252))
1063 s->prob.p.mv_comp[i].class0_fp[j][k] =
1064 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1066 for (j = 0; j < 3; j++)
1067 if (vp56_rac_get_prob_branchy(&s->c, 252))
1068 s->prob.p.mv_comp[i].fp[j] =
1069 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1072 if (s->s.h.highprecisionmvs) {
1073 for (i = 0; i < 2; i++) {
1074 if (vp56_rac_get_prob_branchy(&s->c, 252))
1075 s->prob.p.mv_comp[i].class0_hp =
1076 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1078 if (vp56_rac_get_prob_branchy(&s->c, 252))
1079 s->prob.p.mv_comp[i].hp =
1080 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1085 return (data2 - data) + size2;
1088 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1089 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1091 const VP9Context *s = td->s;
1092 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1093 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1094 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1095 s->prob.p.partition[bl][c];
1096 enum BlockPartition bp;
1097 ptrdiff_t hbs = 4 >> bl;
1098 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1099 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1100 int bytesperpixel = s->bytesperpixel;
1103 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1104 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1105 } else if (col + hbs < s->cols) { // FIXME why not <=?
1106 if (row + hbs < s->rows) { // FIXME why not <=?
1107 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1109 case PARTITION_NONE:
1110 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1113 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1114 yoff += hbs * 8 * y_stride;
1115 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1116 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1119 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1120 yoff += hbs * 8 * bytesperpixel;
1121 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1122 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1124 case PARTITION_SPLIT:
1125 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1126 decode_sb(td, row, col + hbs, lflvl,
1127 yoff + 8 * hbs * bytesperpixel,
1128 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1129 yoff += hbs * 8 * y_stride;
1130 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1131 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1132 decode_sb(td, row + hbs, col + hbs, lflvl,
1133 yoff + 8 * hbs * bytesperpixel,
1134 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1139 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1140 bp = PARTITION_SPLIT;
1141 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1142 decode_sb(td, row, col + hbs, lflvl,
1143 yoff + 8 * hbs * bytesperpixel,
1144 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1147 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1149 } else if (row + hbs < s->rows) { // FIXME why not <=?
1150 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1151 bp = PARTITION_SPLIT;
1152 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1153 yoff += hbs * 8 * y_stride;
1154 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1155 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1158 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1161 bp = PARTITION_SPLIT;
1162 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1164 td->counts.partition[bl][c][bp]++;
1167 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1168 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1170 const VP9Context *s = td->s;
1171 VP9Block *b = td->b;
1172 ptrdiff_t hbs = 4 >> bl;
1173 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1174 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1175 int bytesperpixel = s->bytesperpixel;
1178 av_assert2(b->bl == BL_8X8);
1179 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1180 } else if (td->b->bl == bl) {
1181 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1182 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1183 yoff += hbs * 8 * y_stride;
1184 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1185 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1186 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1187 yoff += hbs * 8 * bytesperpixel;
1188 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1189 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1192 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1193 if (col + hbs < s->cols) { // FIXME why not <=?
1194 if (row + hbs < s->rows) {
1195 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1196 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1197 yoff += hbs * 8 * y_stride;
1198 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1199 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1200 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1201 yoff + 8 * hbs * bytesperpixel,
1202 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1204 yoff += hbs * 8 * bytesperpixel;
1205 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1206 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1208 } else if (row + hbs < s->rows) {
1209 yoff += hbs * 8 * y_stride;
1210 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1211 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1216 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1218 int sb_start = ( idx * n) >> log2_n;
1219 int sb_end = ((idx + 1) * n) >> log2_n;
1220 *start = FFMIN(sb_start, n) << 3;
1221 *end = FFMIN(sb_end, n) << 3;
1224 static void free_buffers(VP9Context *s)
1228 av_freep(&s->intra_pred_data[0]);
1229 for (i = 0; i < s->active_tile_cols; i++)
1230 vp9_tile_data_free(&s->td[i]);
1233 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1235 VP9Context *s = avctx->priv_data;
1238 for (i = 0; i < 3; i++) {
1239 vp9_frame_unref(avctx, &s->s.frames[i]);
1240 av_frame_free(&s->s.frames[i].tf.f);
1242 av_buffer_pool_uninit(&s->frame_extradata_pool);
1243 for (i = 0; i < 8; i++) {
1244 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1245 av_frame_free(&s->s.refs[i].f);
1246 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1247 av_frame_free(&s->next_refs[i].f);
1251 vp9_free_entries(avctx);
1256 static int decode_tiles(AVCodecContext *avctx,
1257 const uint8_t *data, int size)
1259 VP9Context *s = avctx->priv_data;
1260 VP9TileData *td = &s->td[0];
1261 int row, col, tile_row, tile_col, ret;
1263 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1265 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1267 f = s->s.frames[CUR_FRAME].tf.f;
1268 ls_y = f->linesize[0];
1269 ls_uv =f->linesize[1];
1270 bytesperpixel = s->bytesperpixel;
1273 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1274 set_tile_offset(&tile_row_start, &tile_row_end,
1275 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1277 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1280 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1281 tile_row == s->s.h.tiling.tile_rows - 1) {
1284 tile_size = AV_RB32(data);
1288 if (tile_size > size) {
1289 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1290 return AVERROR_INVALIDDATA;
1292 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1295 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1296 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1297 return AVERROR_INVALIDDATA;
1303 for (row = tile_row_start; row < tile_row_end;
1304 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1305 VP9Filter *lflvl_ptr = s->lflvl;
1306 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1308 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1309 set_tile_offset(&tile_col_start, &tile_col_end,
1310 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1311 td->tile_col_start = tile_col_start;
1313 memset(td->left_partition_ctx, 0, 8);
1314 memset(td->left_skip_ctx, 0, 8);
1315 if (s->s.h.keyframe || s->s.h.intraonly) {
1316 memset(td->left_mode_ctx, DC_PRED, 16);
1318 memset(td->left_mode_ctx, NEARESTMV, 8);
1320 memset(td->left_y_nnz_ctx, 0, 16);
1321 memset(td->left_uv_nnz_ctx, 0, 32);
1322 memset(td->left_segpred_ctx, 0, 8);
1324 td->c = &td->c_b[tile_col];
1327 for (col = tile_col_start;
1329 col += 8, yoff2 += 64 * bytesperpixel,
1330 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1331 // FIXME integrate with lf code (i.e. zero after each
1332 // use, similar to invtxfm coefficients, or similar)
1334 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1338 decode_sb_mem(td, row, col, lflvl_ptr,
1339 yoff2, uvoff2, BL_64X64);
1341 if (vpX_rac_is_end(td->c)) {
1342 return AVERROR_INVALIDDATA;
1344 decode_sb(td, row, col, lflvl_ptr,
1345 yoff2, uvoff2, BL_64X64);
1353 // backup pre-loopfilter reconstruction data for intra
1354 // prediction of next row of sb64s
1355 if (row + 8 < s->rows) {
1356 memcpy(s->intra_pred_data[0],
1357 f->data[0] + yoff + 63 * ls_y,
1358 8 * s->cols * bytesperpixel);
1359 memcpy(s->intra_pred_data[1],
1360 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1361 8 * s->cols * bytesperpixel >> s->ss_h);
1362 memcpy(s->intra_pred_data[2],
1363 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1364 8 * s->cols * bytesperpixel >> s->ss_h);
1367 // loopfilter one row
1368 if (s->s.h.filter.level) {
1371 lflvl_ptr = s->lflvl;
1372 for (col = 0; col < s->cols;
1373 col += 8, yoff2 += 64 * bytesperpixel,
1374 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1375 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1380 // FIXME maybe we can make this more finegrained by running the
1381 // loopfilter per-block instead of after each sbrow
1382 // In fact that would also make intra pred left preparation easier?
1383 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1390 static av_always_inline
1391 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1394 VP9Context *s = avctx->priv_data;
1395 VP9TileData *td = &s->td[jobnr];
1396 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1397 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1398 unsigned tile_cols_len;
1399 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1400 VP9Filter *lflvl_ptr_base;
1403 f = s->s.frames[CUR_FRAME].tf.f;
1404 ls_y = f->linesize[0];
1405 ls_uv =f->linesize[1];
1407 set_tile_offset(&tile_col_start, &tile_col_end,
1408 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1409 td->tile_col_start = tile_col_start;
1410 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1411 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1412 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1414 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1415 set_tile_offset(&tile_row_start, &tile_row_end,
1416 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1418 td->c = &td->c_b[tile_row];
1419 for (row = tile_row_start; row < tile_row_end;
1420 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1421 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1422 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1424 memset(td->left_partition_ctx, 0, 8);
1425 memset(td->left_skip_ctx, 0, 8);
1426 if (s->s.h.keyframe || s->s.h.intraonly) {
1427 memset(td->left_mode_ctx, DC_PRED, 16);
1429 memset(td->left_mode_ctx, NEARESTMV, 8);
1431 memset(td->left_y_nnz_ctx, 0, 16);
1432 memset(td->left_uv_nnz_ctx, 0, 32);
1433 memset(td->left_segpred_ctx, 0, 8);
1435 for (col = tile_col_start;
1437 col += 8, yoff2 += 64 * bytesperpixel,
1438 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1439 // FIXME integrate with lf code (i.e. zero after each
1440 // use, similar to invtxfm coefficients, or similar)
1441 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1442 decode_sb(td, row, col, lflvl_ptr,
1443 yoff2, uvoff2, BL_64X64);
1446 // backup pre-loopfilter reconstruction data for intra
1447 // prediction of next row of sb64s
1448 tile_cols_len = tile_col_end - tile_col_start;
1449 if (row + 8 < s->rows) {
1450 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1451 f->data[0] + yoff + 63 * ls_y,
1452 8 * tile_cols_len * bytesperpixel);
1453 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1454 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1455 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1456 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1457 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1458 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1461 vp9_report_tile_progress(s, row >> 3, 1);
1467 static av_always_inline
1468 int loopfilter_proc(AVCodecContext *avctx)
1470 VP9Context *s = avctx->priv_data;
1471 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1472 VP9Filter *lflvl_ptr;
1473 int bytesperpixel = s->bytesperpixel, col, i;
1476 f = s->s.frames[CUR_FRAME].tf.f;
1477 ls_y = f->linesize[0];
1478 ls_uv =f->linesize[1];
1480 for (i = 0; i < s->sb_rows; i++) {
1481 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1483 if (s->s.h.filter.level) {
1484 yoff = (ls_y * 64)*i;
1485 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1486 lflvl_ptr = s->lflvl+s->sb_cols*i;
1487 for (col = 0; col < s->cols;
1488 col += 8, yoff += 64 * bytesperpixel,
1489 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1490 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1499 static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
1501 AVVideoEncParams *par;
1502 unsigned int tile, nb_blocks = 0;
1504 if (s->s.h.segmentation.enabled) {
1505 for (tile = 0; tile < s->active_tile_cols; tile++)
1506 nb_blocks += s->td[tile].nb_block_structure;
1509 par = av_video_enc_params_create_side_data(frame->tf.f,
1510 AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
1512 return AVERROR(ENOMEM);
1514 par->qp = s->s.h.yac_qi;
1515 par->delta_qp[0][0] = s->s.h.ydc_qdelta;
1516 par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
1517 par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
1518 par->delta_qp[1][1] = s->s.h.uvac_qdelta;
1519 par->delta_qp[2][1] = s->s.h.uvac_qdelta;
1522 unsigned int block = 0;
1523 unsigned int tile, block_tile;
1525 for (tile = 0; tile < s->active_tile_cols; tile++) {
1526 VP9TileData *td = &s->td[tile];
1528 for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
1529 AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
1530 unsigned int row = td->block_structure[block_tile].row;
1531 unsigned int col = td->block_structure[block_tile].col;
1532 uint8_t seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
1536 b->w = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
1537 b->h = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
1539 if (s->s.h.segmentation.feat[seg_id].q_enabled) {
1540 b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
1541 if (s->s.h.segmentation.absolute_vals)
1542 b->delta_qp -= par->qp;
1551 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1552 int *got_frame, AVPacket *pkt)
1554 const uint8_t *data = pkt->data;
1555 int size = pkt->size;
1556 VP9Context *s = avctx->priv_data;
1558 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1559 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1562 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1564 } else if (ret == 0) {
1565 if (!s->s.refs[ref].f->buf[0]) {
1566 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1567 return AVERROR_INVALIDDATA;
1569 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1571 ((AVFrame *)frame)->pts = pkt->pts;
1573 FF_DISABLE_DEPRECATION_WARNINGS
1574 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1575 FF_ENABLE_DEPRECATION_WARNINGS
1577 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1578 for (i = 0; i < 8; i++) {
1579 if (s->next_refs[i].f->buf[0])
1580 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1581 if (s->s.refs[i].f->buf[0] &&
1582 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1591 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1592 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1593 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1594 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1595 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1598 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1599 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1600 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1601 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1603 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1604 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1605 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1607 f = s->s.frames[CUR_FRAME].tf.f;
1608 f->key_frame = s->s.h.keyframe;
1609 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1611 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1612 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1613 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1614 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1618 for (i = 0; i < 8; i++) {
1619 if (s->next_refs[i].f->buf[0])
1620 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1621 if (s->s.h.refreshrefmask & (1 << i)) {
1622 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1623 } else if (s->s.refs[i].f->buf[0]) {
1624 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1630 if (avctx->hwaccel) {
1631 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1634 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1637 ret = avctx->hwaccel->end_frame(avctx);
1643 // main tile decode loop
1644 memset(s->above_partition_ctx, 0, s->cols);
1645 memset(s->above_skip_ctx, 0, s->cols);
1646 if (s->s.h.keyframe || s->s.h.intraonly) {
1647 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1649 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1651 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1652 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1653 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1654 memset(s->above_segpred_ctx, 0, s->cols);
1655 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1656 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1657 if ((ret = update_block_buffers(avctx)) < 0) {
1658 av_log(avctx, AV_LOG_ERROR,
1659 "Failed to allocate block buffers\n");
1662 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1665 for (i = 0; i < 4; i++) {
1666 for (j = 0; j < 2; j++)
1667 for (k = 0; k < 2; k++)
1668 for (l = 0; l < 6; l++)
1669 for (m = 0; m < 6; m++)
1670 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1671 s->prob.coef[i][j][k][l][m], 3);
1672 if (s->s.h.txfmmode == i)
1675 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1676 ff_thread_finish_setup(avctx);
1677 } else if (!s->s.h.refreshctx) {
1678 ff_thread_finish_setup(avctx);
1682 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1683 for (i = 0; i < s->sb_rows; i++)
1684 atomic_store(&s->entries[i], 0);
1689 for (i = 0; i < s->active_tile_cols; i++) {
1690 s->td[i].b = s->td[i].b_base;
1691 s->td[i].block = s->td[i].block_base;
1692 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1693 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1694 s->td[i].eob = s->td[i].eob_base;
1695 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1696 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1697 s->td[i].error_info = 0;
1701 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1702 int tile_row, tile_col;
1704 av_assert1(!s->pass);
1706 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1707 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1710 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1711 tile_row == s->s.h.tiling.tile_rows - 1) {
1714 tile_size = AV_RB32(data);
1718 if (tile_size > size)
1719 return AVERROR_INVALIDDATA;
1720 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1723 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1724 return AVERROR_INVALIDDATA;
1730 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1734 ret = decode_tiles(avctx, data, size);
1736 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1741 // Sum all counts fields into td[0].counts for tile threading
1742 if (avctx->active_thread_type == FF_THREAD_SLICE)
1743 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1744 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1745 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1747 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1748 ff_vp9_adapt_probs(s);
1749 ff_thread_finish_setup(avctx);
1751 } while (s->pass++ == 1);
1752 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1754 if (s->td->error_info < 0) {
1755 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1756 s->td->error_info = 0;
1757 return AVERROR_INVALIDDATA;
1759 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
1760 ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
1767 for (i = 0; i < 8; i++) {
1768 if (s->s.refs[i].f->buf[0])
1769 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1770 if (s->next_refs[i].f->buf[0] &&
1771 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1775 if (!s->s.h.invisible) {
1776 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1784 static void vp9_decode_flush(AVCodecContext *avctx)
1786 VP9Context *s = avctx->priv_data;
1789 for (i = 0; i < 3; i++)
1790 vp9_frame_unref(avctx, &s->s.frames[i]);
1791 for (i = 0; i < 8; i++)
1792 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1795 static int init_frames(AVCodecContext *avctx)
1797 VP9Context *s = avctx->priv_data;
1800 for (i = 0; i < 3; i++) {
1801 s->s.frames[i].tf.f = av_frame_alloc();
1802 if (!s->s.frames[i].tf.f) {
1803 vp9_decode_free(avctx);
1804 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1805 return AVERROR(ENOMEM);
1808 for (i = 0; i < 8; i++) {
1809 s->s.refs[i].f = av_frame_alloc();
1810 s->next_refs[i].f = av_frame_alloc();
1811 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1812 vp9_decode_free(avctx);
1813 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1814 return AVERROR(ENOMEM);
1821 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1823 VP9Context *s = avctx->priv_data;
1826 s->s.h.filter.sharpness = -1;
1828 return init_frames(avctx);
1832 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1835 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1837 for (i = 0; i < 3; i++) {
1838 if (s->s.frames[i].tf.f->buf[0])
1839 vp9_frame_unref(dst, &s->s.frames[i]);
1840 if (ssrc->s.frames[i].tf.f->buf[0]) {
1841 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1845 for (i = 0; i < 8; i++) {
1846 if (s->s.refs[i].f->buf[0])
1847 ff_thread_release_buffer(dst, &s->s.refs[i]);
1848 if (ssrc->next_refs[i].f->buf[0]) {
1849 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1854 s->s.h.invisible = ssrc->s.h.invisible;
1855 s->s.h.keyframe = ssrc->s.h.keyframe;
1856 s->s.h.intraonly = ssrc->s.h.intraonly;
1857 s->ss_v = ssrc->ss_v;
1858 s->ss_h = ssrc->ss_h;
1859 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1860 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1861 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1862 s->bytesperpixel = ssrc->bytesperpixel;
1863 s->gf_fmt = ssrc->gf_fmt;
1866 s->s.h.bpp = ssrc->s.h.bpp;
1867 s->bpp_index = ssrc->bpp_index;
1868 s->pix_fmt = ssrc->pix_fmt;
1869 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1870 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1871 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1872 sizeof(s->s.h.segmentation.feat));
1878 AVCodec ff_vp9_decoder = {
1880 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1881 .type = AVMEDIA_TYPE_VIDEO,
1882 .id = AV_CODEC_ID_VP9,
1883 .priv_data_size = sizeof(VP9Context),
1884 .init = vp9_decode_init,
1885 .close = vp9_decode_free,
1886 .decode = vp9_decode_frame,
1887 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1888 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1889 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1890 .flush = vp9_decode_flush,
1891 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1892 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1893 .bsfs = "vp9_superframe_split",
1894 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1895 #if CONFIG_VP9_DXVA2_HWACCEL
1898 #if CONFIG_VP9_D3D11VA_HWACCEL
1899 HWACCEL_D3D11VA(vp9),
1901 #if CONFIG_VP9_D3D11VA2_HWACCEL
1902 HWACCEL_D3D11VA2(vp9),
1904 #if CONFIG_VP9_NVDEC_HWACCEL
1907 #if CONFIG_VP9_VAAPI_HWACCEL
1910 #if CONFIG_VP9_VDPAU_HWACCEL