2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
37 #include "libavutil/video_enc_params.h"
39 #define VP9_SYNCCODE 0x498342
42 static void vp9_free_entries(AVCodecContext *avctx) {
43 VP9Context *s = avctx->priv_data;
45 if (avctx->active_thread_type & FF_THREAD_SLICE) {
46 pthread_mutex_destroy(&s->progress_mutex);
47 pthread_cond_destroy(&s->progress_cond);
48 av_freep(&s->entries);
52 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
53 VP9Context *s = avctx->priv_data;
56 if (avctx->active_thread_type & FF_THREAD_SLICE) {
58 av_freep(&s->entries);
60 s->entries = av_malloc_array(n, sizeof(atomic_int));
63 av_freep(&s->entries);
64 return AVERROR(ENOMEM);
67 for (i = 0; i < n; i++)
68 atomic_init(&s->entries[i], 0);
70 pthread_mutex_init(&s->progress_mutex, NULL);
71 pthread_cond_init(&s->progress_cond, NULL);
76 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
77 pthread_mutex_lock(&s->progress_mutex);
78 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
79 pthread_cond_signal(&s->progress_cond);
80 pthread_mutex_unlock(&s->progress_mutex);
83 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
84 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
87 pthread_mutex_lock(&s->progress_mutex);
88 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
89 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
90 pthread_mutex_unlock(&s->progress_mutex);
93 static void vp9_free_entries(AVCodecContext *avctx) {}
94 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
97 static void vp9_tile_data_free(VP9TileData *td)
99 av_freep(&td->b_base);
100 av_freep(&td->block_base);
101 av_freep(&td->block_structure);
104 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
106 ff_thread_release_buffer(avctx, &f->tf);
107 av_buffer_unref(&f->extradata);
108 av_buffer_unref(&f->hwaccel_priv_buf);
109 f->segmentation_map = NULL;
110 f->hwaccel_picture_private = NULL;
113 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
115 VP9Context *s = avctx->priv_data;
118 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
122 sz = 64 * s->sb_cols * s->sb_rows;
123 if (sz != s->frame_extradata_pool_size) {
124 av_buffer_pool_uninit(&s->frame_extradata_pool);
125 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
126 if (!s->frame_extradata_pool) {
127 s->frame_extradata_pool_size = 0;
130 s->frame_extradata_pool_size = sz;
132 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
136 memset(f->extradata->data, 0, f->extradata->size);
138 f->segmentation_map = f->extradata->data;
139 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
141 if (avctx->hwaccel) {
142 const AVHWAccel *hwaccel = avctx->hwaccel;
143 av_assert0(!f->hwaccel_picture_private);
144 if (hwaccel->frame_priv_data_size) {
145 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
146 if (!f->hwaccel_priv_buf)
148 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
155 vp9_frame_unref(avctx, f);
156 return AVERROR(ENOMEM);
159 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
163 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
167 dst->extradata = av_buffer_ref(src->extradata);
171 dst->segmentation_map = src->segmentation_map;
173 dst->uses_2pass = src->uses_2pass;
175 if (src->hwaccel_picture_private) {
176 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
177 if (!dst->hwaccel_priv_buf)
179 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
185 vp9_frame_unref(avctx, dst);
186 return AVERROR(ENOMEM);
189 static int update_size(AVCodecContext *avctx, int w, int h)
191 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
192 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
193 CONFIG_VP9_NVDEC_HWACCEL + \
194 CONFIG_VP9_VAAPI_HWACCEL + \
195 CONFIG_VP9_VDPAU_HWACCEL)
196 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
197 VP9Context *s = avctx->priv_data;
199 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
202 av_assert0(w > 0 && h > 0);
204 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
205 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
208 switch (s->pix_fmt) {
209 case AV_PIX_FMT_YUV420P:
210 case AV_PIX_FMT_YUV420P10:
211 #if CONFIG_VP9_DXVA2_HWACCEL
212 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
214 #if CONFIG_VP9_D3D11VA_HWACCEL
215 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
216 *fmtp++ = AV_PIX_FMT_D3D11;
218 #if CONFIG_VP9_NVDEC_HWACCEL
219 *fmtp++ = AV_PIX_FMT_CUDA;
221 #if CONFIG_VP9_VAAPI_HWACCEL
222 *fmtp++ = AV_PIX_FMT_VAAPI;
224 #if CONFIG_VP9_VDPAU_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VDPAU;
228 case AV_PIX_FMT_YUV420P12:
229 #if CONFIG_VP9_NVDEC_HWACCEL
230 *fmtp++ = AV_PIX_FMT_CUDA;
232 #if CONFIG_VP9_VAAPI_HWACCEL
233 *fmtp++ = AV_PIX_FMT_VAAPI;
235 #if CONFIG_VP9_VDPAU_HWACCEL
236 *fmtp++ = AV_PIX_FMT_VDPAU;
241 *fmtp++ = s->pix_fmt;
242 *fmtp = AV_PIX_FMT_NONE;
244 ret = ff_thread_get_format(avctx, pix_fmts);
248 avctx->pix_fmt = ret;
249 s->gf_fmt = s->pix_fmt;
257 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
260 s->last_fmt = s->pix_fmt;
261 s->sb_cols = (w + 63) >> 6;
262 s->sb_rows = (h + 63) >> 6;
263 s->cols = (w + 7) >> 3;
264 s->rows = (h + 7) >> 3;
265 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
267 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
268 av_freep(&s->intra_pred_data[0]);
269 // FIXME we slightly over-allocate here for subsampled chroma, but a little
270 // bit of padding shouldn't affect performance...
271 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
272 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
274 return AVERROR(ENOMEM);
275 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
276 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
277 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
278 assign(s->above_y_nnz_ctx, uint8_t *, 16);
279 assign(s->above_mode_ctx, uint8_t *, 16);
280 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
281 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
282 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
283 assign(s->above_partition_ctx, uint8_t *, 8);
284 assign(s->above_skip_ctx, uint8_t *, 8);
285 assign(s->above_txfm_ctx, uint8_t *, 8);
286 assign(s->above_segpred_ctx, uint8_t *, 8);
287 assign(s->above_intra_ctx, uint8_t *, 8);
288 assign(s->above_comp_ctx, uint8_t *, 8);
289 assign(s->above_ref_ctx, uint8_t *, 8);
290 assign(s->above_filter_ctx, uint8_t *, 8);
291 assign(s->lflvl, VP9Filter *, lflvl_len);
295 for (i = 0; i < s->active_tile_cols; i++)
296 vp9_tile_data_free(&s->td[i]);
299 if (s->s.h.bpp != s->last_bpp) {
300 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
301 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
302 s->last_bpp = s->s.h.bpp;
308 static int update_block_buffers(AVCodecContext *avctx)
311 VP9Context *s = avctx->priv_data;
312 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
313 VP9TileData *td = &s->td[0];
315 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
318 vp9_tile_data_free(td);
319 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
320 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
321 if (s->s.frames[CUR_FRAME].uses_2pass) {
322 int sbs = s->sb_cols * s->sb_rows;
324 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
325 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
326 16 * 16 + 2 * chroma_eobs) * sbs);
327 if (!td->b_base || !td->block_base)
328 return AVERROR(ENOMEM);
329 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
330 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
331 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
332 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
333 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
335 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
336 td->block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
337 if (!td->block_structure)
338 return AVERROR(ENOMEM);
341 for (i = 1; i < s->active_tile_cols; i++)
342 vp9_tile_data_free(&s->td[i]);
344 for (i = 0; i < s->active_tile_cols; i++) {
345 s->td[i].b_base = av_malloc(sizeof(VP9Block));
346 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
347 16 * 16 + 2 * chroma_eobs);
348 if (!s->td[i].b_base || !s->td[i].block_base)
349 return AVERROR(ENOMEM);
350 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
351 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
352 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
353 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
354 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
356 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
357 s->td[i].block_structure = av_malloc_array(s->cols * s->rows, sizeof(*td->block_structure));
358 if (!s->td[i].block_structure)
359 return AVERROR(ENOMEM);
363 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
368 // The sign bit is at the end, not the start, of a bit sequence
369 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
371 int v = get_bits(gb, n);
372 return get_bits1(gb) ? -v : v;
375 static av_always_inline int inv_recenter_nonneg(int v, int m)
380 return m - ((v + 1) >> 1);
384 // differential forward probability updates
385 static int update_prob(VP56RangeCoder *c, int p)
387 static const uint8_t inv_map_table[255] = {
388 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
389 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
390 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
391 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
392 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
393 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
394 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
395 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
396 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
397 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
398 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
399 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
400 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
401 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
402 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
403 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
404 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
405 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
410 /* This code is trying to do a differential probability update. For a
411 * current probability A in the range [1, 255], the difference to a new
412 * probability of any value can be expressed differentially as 1-A, 255-A
413 * where some part of this (absolute range) exists both in positive as
414 * well as the negative part, whereas another part only exists in one
415 * half. We're trying to code this shared part differentially, i.e.
416 * times two where the value of the lowest bit specifies the sign, and
417 * the single part is then coded on top of this. This absolute difference
418 * then again has a value of [0, 254], but a bigger value in this range
419 * indicates that we're further away from the original value A, so we
420 * can code this as a VLC code, since higher values are increasingly
421 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
422 * updates vs. the 'fine, exact' updates further down the range, which
423 * adds one extra dimension to this differential update model. */
425 if (!vp8_rac_get(c)) {
426 d = vp8_rac_get_uint(c, 4) + 0;
427 } else if (!vp8_rac_get(c)) {
428 d = vp8_rac_get_uint(c, 4) + 16;
429 } else if (!vp8_rac_get(c)) {
430 d = vp8_rac_get_uint(c, 5) + 32;
432 d = vp8_rac_get_uint(c, 7);
434 d = (d << 1) - 65 + vp8_rac_get(c);
436 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
439 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
440 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
443 static int read_colorspace_details(AVCodecContext *avctx)
445 static const enum AVColorSpace colorspaces[8] = {
446 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
447 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
449 VP9Context *s = avctx->priv_data;
450 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
453 s->s.h.bpp = 8 + bits * 2;
454 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
455 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
456 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
457 static const enum AVPixelFormat pix_fmt_rgb[3] = {
458 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
460 s->ss_h = s->ss_v = 0;
461 avctx->color_range = AVCOL_RANGE_JPEG;
462 s->pix_fmt = pix_fmt_rgb[bits];
463 if (avctx->profile & 1) {
464 if (get_bits1(&s->gb)) {
465 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
466 return AVERROR_INVALIDDATA;
469 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
471 return AVERROR_INVALIDDATA;
474 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
475 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
476 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
477 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
478 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
479 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
480 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
482 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
483 if (avctx->profile & 1) {
484 s->ss_h = get_bits1(&s->gb);
485 s->ss_v = get_bits1(&s->gb);
486 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
487 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
488 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
490 return AVERROR_INVALIDDATA;
491 } else if (get_bits1(&s->gb)) {
492 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
494 return AVERROR_INVALIDDATA;
497 s->ss_h = s->ss_v = 1;
498 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
505 static int decode_frame_header(AVCodecContext *avctx,
506 const uint8_t *data, int size, int *ref)
508 VP9Context *s = avctx->priv_data;
509 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
511 const uint8_t *data2;
514 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
515 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
518 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
519 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
520 return AVERROR_INVALIDDATA;
522 avctx->profile = get_bits1(&s->gb);
523 avctx->profile |= get_bits1(&s->gb) << 1;
524 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
525 if (avctx->profile > 3) {
526 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
527 return AVERROR_INVALIDDATA;
529 s->s.h.profile = avctx->profile;
530 if (get_bits1(&s->gb)) {
531 *ref = get_bits(&s->gb, 3);
535 s->last_keyframe = s->s.h.keyframe;
536 s->s.h.keyframe = !get_bits1(&s->gb);
538 last_invisible = s->s.h.invisible;
539 s->s.h.invisible = !get_bits1(&s->gb);
540 s->s.h.errorres = get_bits1(&s->gb);
541 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
543 if (s->s.h.keyframe) {
544 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
545 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
546 return AVERROR_INVALIDDATA;
548 if ((ret = read_colorspace_details(avctx)) < 0)
550 // for profile 1, here follows the subsampling bits
551 s->s.h.refreshrefmask = 0xff;
552 w = get_bits(&s->gb, 16) + 1;
553 h = get_bits(&s->gb, 16) + 1;
554 if (get_bits1(&s->gb)) // display size
555 skip_bits(&s->gb, 32);
557 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
558 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
559 if (s->s.h.intraonly) {
560 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
561 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
562 return AVERROR_INVALIDDATA;
564 if (avctx->profile >= 1) {
565 if ((ret = read_colorspace_details(avctx)) < 0)
568 s->ss_h = s->ss_v = 1;
571 s->bytesperpixel = 1;
572 s->pix_fmt = AV_PIX_FMT_YUV420P;
573 avctx->colorspace = AVCOL_SPC_BT470BG;
574 avctx->color_range = AVCOL_RANGE_MPEG;
576 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
577 w = get_bits(&s->gb, 16) + 1;
578 h = get_bits(&s->gb, 16) + 1;
579 if (get_bits1(&s->gb)) // display size
580 skip_bits(&s->gb, 32);
582 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
583 s->s.h.refidx[0] = get_bits(&s->gb, 3);
584 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
585 s->s.h.refidx[1] = get_bits(&s->gb, 3);
586 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
587 s->s.h.refidx[2] = get_bits(&s->gb, 3);
588 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
589 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
590 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
591 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
592 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
593 return AVERROR_INVALIDDATA;
595 if (get_bits1(&s->gb)) {
596 w = s->s.refs[s->s.h.refidx[0]].f->width;
597 h = s->s.refs[s->s.h.refidx[0]].f->height;
598 } else if (get_bits1(&s->gb)) {
599 w = s->s.refs[s->s.h.refidx[1]].f->width;
600 h = s->s.refs[s->s.h.refidx[1]].f->height;
601 } else if (get_bits1(&s->gb)) {
602 w = s->s.refs[s->s.h.refidx[2]].f->width;
603 h = s->s.refs[s->s.h.refidx[2]].f->height;
605 w = get_bits(&s->gb, 16) + 1;
606 h = get_bits(&s->gb, 16) + 1;
608 // Note that in this code, "CUR_FRAME" is actually before we
609 // have formally allocated a frame, and thus actually represents
611 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
612 s->s.frames[CUR_FRAME].tf.f->height == h;
613 if (get_bits1(&s->gb)) // display size
614 skip_bits(&s->gb, 32);
615 s->s.h.highprecisionmvs = get_bits1(&s->gb);
616 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
618 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
619 s->s.h.signbias[0] != s->s.h.signbias[2];
620 if (s->s.h.allowcompinter) {
621 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
622 s->s.h.fixcompref = 2;
623 s->s.h.varcompref[0] = 0;
624 s->s.h.varcompref[1] = 1;
625 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
626 s->s.h.fixcompref = 1;
627 s->s.h.varcompref[0] = 0;
628 s->s.h.varcompref[1] = 2;
630 s->s.h.fixcompref = 0;
631 s->s.h.varcompref[0] = 1;
632 s->s.h.varcompref[1] = 2;
637 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
638 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
639 s->s.h.framectxid = c = get_bits(&s->gb, 2);
640 if (s->s.h.keyframe || s->s.h.intraonly)
641 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
643 /* loopfilter header data */
644 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
645 // reset loopfilter defaults
646 s->s.h.lf_delta.ref[0] = 1;
647 s->s.h.lf_delta.ref[1] = 0;
648 s->s.h.lf_delta.ref[2] = -1;
649 s->s.h.lf_delta.ref[3] = -1;
650 s->s.h.lf_delta.mode[0] = 0;
651 s->s.h.lf_delta.mode[1] = 0;
652 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
654 s->s.h.filter.level = get_bits(&s->gb, 6);
655 sharp = get_bits(&s->gb, 3);
656 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
657 // the old cache values since they are still valid
658 if (s->s.h.filter.sharpness != sharp) {
659 for (i = 1; i <= 63; i++) {
663 limit >>= (sharp + 3) >> 2;
664 limit = FFMIN(limit, 9 - sharp);
666 limit = FFMAX(limit, 1);
668 s->filter_lut.lim_lut[i] = limit;
669 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
672 s->s.h.filter.sharpness = sharp;
673 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
674 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
675 for (i = 0; i < 4; i++)
676 if (get_bits1(&s->gb))
677 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
678 for (i = 0; i < 2; i++)
679 if (get_bits1(&s->gb))
680 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
684 /* quantization header data */
685 s->s.h.yac_qi = get_bits(&s->gb, 8);
686 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
687 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
688 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
689 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
690 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
692 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
694 /* segmentation header info */
695 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
696 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
697 for (i = 0; i < 7; i++)
698 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
699 get_bits(&s->gb, 8) : 255;
700 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
701 for (i = 0; i < 3; i++)
702 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
703 get_bits(&s->gb, 8) : 255;
706 if (get_bits1(&s->gb)) {
707 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
708 for (i = 0; i < 8; i++) {
709 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
710 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
711 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
712 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
713 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
714 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
715 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
720 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
721 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
722 int qyac, qydc, quvac, quvdc, lflvl, sh;
724 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
725 if (s->s.h.segmentation.absolute_vals)
726 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
728 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
730 qyac = s->s.h.yac_qi;
732 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
733 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
734 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
735 qyac = av_clip_uintp2(qyac, 8);
737 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
738 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
739 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
740 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
742 sh = s->s.h.filter.level >= 32;
743 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
744 if (s->s.h.segmentation.absolute_vals)
745 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
747 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
749 lflvl = s->s.h.filter.level;
751 if (s->s.h.lf_delta.enabled) {
752 s->s.h.segmentation.feat[i].lflvl[0][0] =
753 s->s.h.segmentation.feat[i].lflvl[0][1] =
754 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
755 for (j = 1; j < 4; j++) {
756 s->s.h.segmentation.feat[i].lflvl[j][0] =
757 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
758 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
759 s->s.h.segmentation.feat[i].lflvl[j][1] =
760 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
761 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
764 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
765 sizeof(s->s.h.segmentation.feat[i].lflvl));
770 if ((ret = update_size(avctx, w, h)) < 0) {
771 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
775 for (s->s.h.tiling.log2_tile_cols = 0;
776 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
777 s->s.h.tiling.log2_tile_cols++) ;
778 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
779 max = FFMAX(0, max - 1);
780 while (max > s->s.h.tiling.log2_tile_cols) {
781 if (get_bits1(&s->gb))
782 s->s.h.tiling.log2_tile_cols++;
786 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
787 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
788 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
793 for (i = 0; i < s->active_tile_cols; i++)
794 vp9_tile_data_free(&s->td[i]);
798 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
799 vp9_free_entries(avctx);
800 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
801 s->s.h.tiling.tile_cols : 1;
802 vp9_alloc_entries(avctx, s->sb_rows);
803 if (avctx->active_thread_type == FF_THREAD_SLICE) {
804 n_range_coders = 4; // max_tile_rows
806 n_range_coders = s->s.h.tiling.tile_cols;
808 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
809 n_range_coders * sizeof(VP56RangeCoder));
811 return AVERROR(ENOMEM);
812 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
813 for (i = 0; i < s->active_tile_cols; i++) {
816 rc += n_range_coders;
820 /* check reference frames */
821 if (!s->s.h.keyframe && !s->s.h.intraonly) {
822 int valid_ref_frame = 0;
823 for (i = 0; i < 3; i++) {
824 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
825 int refw = ref->width, refh = ref->height;
827 if (ref->format != avctx->pix_fmt) {
828 av_log(avctx, AV_LOG_ERROR,
829 "Ref pixfmt (%s) did not match current frame (%s)",
830 av_get_pix_fmt_name(ref->format),
831 av_get_pix_fmt_name(avctx->pix_fmt));
832 return AVERROR_INVALIDDATA;
833 } else if (refw == w && refh == h) {
834 s->mvscale[i][0] = s->mvscale[i][1] = 0;
836 /* Check to make sure at least one of frames that */
837 /* this frame references has valid dimensions */
838 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
839 av_log(avctx, AV_LOG_WARNING,
840 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
842 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
845 s->mvscale[i][0] = (refw << 14) / w;
846 s->mvscale[i][1] = (refh << 14) / h;
847 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
848 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
852 if (!valid_ref_frame) {
853 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
854 return AVERROR_INVALIDDATA;
858 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
859 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
860 s->prob_ctx[3].p = ff_vp9_default_probs;
861 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
862 sizeof(ff_vp9_default_coef_probs));
863 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
864 sizeof(ff_vp9_default_coef_probs));
865 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
866 sizeof(ff_vp9_default_coef_probs));
867 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
868 sizeof(ff_vp9_default_coef_probs));
869 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
870 s->prob_ctx[c].p = ff_vp9_default_probs;
871 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
872 sizeof(ff_vp9_default_coef_probs));
875 // next 16 bits is size of the rest of the header (arith-coded)
876 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
877 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
879 data2 = align_get_bits(&s->gb);
880 if (size2 > size - (data2 - data)) {
881 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
882 return AVERROR_INVALIDDATA;
884 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
888 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
889 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
890 return AVERROR_INVALIDDATA;
893 for (i = 0; i < s->active_tile_cols; i++) {
894 if (s->s.h.keyframe || s->s.h.intraonly) {
895 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
896 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
898 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
900 s->td[i].nb_block_structure = 0;
903 /* FIXME is it faster to not copy here, but do it down in the fw updates
904 * as explicit copies if the fw update is missing (and skip the copy upon
906 s->prob.p = s->prob_ctx[c].p;
909 if (s->s.h.lossless) {
910 s->s.h.txfmmode = TX_4X4;
912 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
913 if (s->s.h.txfmmode == 3)
914 s->s.h.txfmmode += vp8_rac_get(&s->c);
916 if (s->s.h.txfmmode == TX_SWITCHABLE) {
917 for (i = 0; i < 2; i++)
918 if (vp56_rac_get_prob_branchy(&s->c, 252))
919 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
920 for (i = 0; i < 2; i++)
921 for (j = 0; j < 2; j++)
922 if (vp56_rac_get_prob_branchy(&s->c, 252))
923 s->prob.p.tx16p[i][j] =
924 update_prob(&s->c, s->prob.p.tx16p[i][j]);
925 for (i = 0; i < 2; i++)
926 for (j = 0; j < 3; j++)
927 if (vp56_rac_get_prob_branchy(&s->c, 252))
928 s->prob.p.tx32p[i][j] =
929 update_prob(&s->c, s->prob.p.tx32p[i][j]);
934 for (i = 0; i < 4; i++) {
935 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
936 if (vp8_rac_get(&s->c)) {
937 for (j = 0; j < 2; j++)
938 for (k = 0; k < 2; k++)
939 for (l = 0; l < 6; l++)
940 for (m = 0; m < 6; m++) {
941 uint8_t *p = s->prob.coef[i][j][k][l][m];
942 uint8_t *r = ref[j][k][l][m];
943 if (m >= 3 && l == 0) // dc only has 3 pt
945 for (n = 0; n < 3; n++) {
946 if (vp56_rac_get_prob_branchy(&s->c, 252))
947 p[n] = update_prob(&s->c, r[n]);
951 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
954 for (j = 0; j < 2; j++)
955 for (k = 0; k < 2; k++)
956 for (l = 0; l < 6; l++)
957 for (m = 0; m < 6; m++) {
958 uint8_t *p = s->prob.coef[i][j][k][l][m];
959 uint8_t *r = ref[j][k][l][m];
960 if (m > 3 && l == 0) // dc only has 3 pt
963 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
966 if (s->s.h.txfmmode == i)
971 for (i = 0; i < 3; i++)
972 if (vp56_rac_get_prob_branchy(&s->c, 252))
973 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
974 if (!s->s.h.keyframe && !s->s.h.intraonly) {
975 for (i = 0; i < 7; i++)
976 for (j = 0; j < 3; j++)
977 if (vp56_rac_get_prob_branchy(&s->c, 252))
978 s->prob.p.mv_mode[i][j] =
979 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
981 if (s->s.h.filtermode == FILTER_SWITCHABLE)
982 for (i = 0; i < 4; i++)
983 for (j = 0; j < 2; j++)
984 if (vp56_rac_get_prob_branchy(&s->c, 252))
985 s->prob.p.filter[i][j] =
986 update_prob(&s->c, s->prob.p.filter[i][j]);
988 for (i = 0; i < 4; i++)
989 if (vp56_rac_get_prob_branchy(&s->c, 252))
990 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
992 if (s->s.h.allowcompinter) {
993 s->s.h.comppredmode = vp8_rac_get(&s->c);
994 if (s->s.h.comppredmode)
995 s->s.h.comppredmode += vp8_rac_get(&s->c);
996 if (s->s.h.comppredmode == PRED_SWITCHABLE)
997 for (i = 0; i < 5; i++)
998 if (vp56_rac_get_prob_branchy(&s->c, 252))
1000 update_prob(&s->c, s->prob.p.comp[i]);
1002 s->s.h.comppredmode = PRED_SINGLEREF;
1005 if (s->s.h.comppredmode != PRED_COMPREF) {
1006 for (i = 0; i < 5; i++) {
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.single_ref[i][0] =
1009 update_prob(&s->c, s->prob.p.single_ref[i][0]);
1010 if (vp56_rac_get_prob_branchy(&s->c, 252))
1011 s->prob.p.single_ref[i][1] =
1012 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1016 if (s->s.h.comppredmode != PRED_SINGLEREF) {
1017 for (i = 0; i < 5; i++)
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.comp_ref[i] =
1020 update_prob(&s->c, s->prob.p.comp_ref[i]);
1023 for (i = 0; i < 4; i++)
1024 for (j = 0; j < 9; j++)
1025 if (vp56_rac_get_prob_branchy(&s->c, 252))
1026 s->prob.p.y_mode[i][j] =
1027 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1029 for (i = 0; i < 4; i++)
1030 for (j = 0; j < 4; j++)
1031 for (k = 0; k < 3; k++)
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.partition[3 - i][j][k] =
1035 s->prob.p.partition[3 - i][j][k]);
1037 // mv fields don't use the update_prob subexp model for some reason
1038 for (i = 0; i < 3; i++)
1039 if (vp56_rac_get_prob_branchy(&s->c, 252))
1040 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1042 for (i = 0; i < 2; i++) {
1043 if (vp56_rac_get_prob_branchy(&s->c, 252))
1044 s->prob.p.mv_comp[i].sign =
1045 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1047 for (j = 0; j < 10; j++)
1048 if (vp56_rac_get_prob_branchy(&s->c, 252))
1049 s->prob.p.mv_comp[i].classes[j] =
1050 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1052 if (vp56_rac_get_prob_branchy(&s->c, 252))
1053 s->prob.p.mv_comp[i].class0 =
1054 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1056 for (j = 0; j < 10; j++)
1057 if (vp56_rac_get_prob_branchy(&s->c, 252))
1058 s->prob.p.mv_comp[i].bits[j] =
1059 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1062 for (i = 0; i < 2; i++) {
1063 for (j = 0; j < 2; j++)
1064 for (k = 0; k < 3; k++)
1065 if (vp56_rac_get_prob_branchy(&s->c, 252))
1066 s->prob.p.mv_comp[i].class0_fp[j][k] =
1067 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1069 for (j = 0; j < 3; j++)
1070 if (vp56_rac_get_prob_branchy(&s->c, 252))
1071 s->prob.p.mv_comp[i].fp[j] =
1072 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1075 if (s->s.h.highprecisionmvs) {
1076 for (i = 0; i < 2; i++) {
1077 if (vp56_rac_get_prob_branchy(&s->c, 252))
1078 s->prob.p.mv_comp[i].class0_hp =
1079 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1081 if (vp56_rac_get_prob_branchy(&s->c, 252))
1082 s->prob.p.mv_comp[i].hp =
1083 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1088 return (data2 - data) + size2;
1091 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1092 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1094 const VP9Context *s = td->s;
1095 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1096 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1097 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1098 s->prob.p.partition[bl][c];
1099 enum BlockPartition bp;
1100 ptrdiff_t hbs = 4 >> bl;
1101 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1102 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1103 int bytesperpixel = s->bytesperpixel;
1106 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1107 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1108 } else if (col + hbs < s->cols) { // FIXME why not <=?
1109 if (row + hbs < s->rows) { // FIXME why not <=?
1110 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1112 case PARTITION_NONE:
1113 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1116 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1117 yoff += hbs * 8 * y_stride;
1118 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1119 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1122 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1123 yoff += hbs * 8 * bytesperpixel;
1124 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1125 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1127 case PARTITION_SPLIT:
1128 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1129 decode_sb(td, row, col + hbs, lflvl,
1130 yoff + 8 * hbs * bytesperpixel,
1131 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1132 yoff += hbs * 8 * y_stride;
1133 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1134 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1135 decode_sb(td, row + hbs, col + hbs, lflvl,
1136 yoff + 8 * hbs * bytesperpixel,
1137 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1142 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1143 bp = PARTITION_SPLIT;
1144 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1145 decode_sb(td, row, col + hbs, lflvl,
1146 yoff + 8 * hbs * bytesperpixel,
1147 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1150 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1152 } else if (row + hbs < s->rows) { // FIXME why not <=?
1153 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1154 bp = PARTITION_SPLIT;
1155 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1156 yoff += hbs * 8 * y_stride;
1157 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1158 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1161 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1164 bp = PARTITION_SPLIT;
1165 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1167 td->counts.partition[bl][c][bp]++;
1170 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1171 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1173 const VP9Context *s = td->s;
1174 VP9Block *b = td->b;
1175 ptrdiff_t hbs = 4 >> bl;
1176 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1177 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1178 int bytesperpixel = s->bytesperpixel;
1181 av_assert2(b->bl == BL_8X8);
1182 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1183 } else if (td->b->bl == bl) {
1184 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1185 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1186 yoff += hbs * 8 * y_stride;
1187 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1188 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1189 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1190 yoff += hbs * 8 * bytesperpixel;
1191 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1192 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1195 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1196 if (col + hbs < s->cols) { // FIXME why not <=?
1197 if (row + hbs < s->rows) {
1198 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1199 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1200 yoff += hbs * 8 * y_stride;
1201 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1202 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1203 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1204 yoff + 8 * hbs * bytesperpixel,
1205 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1207 yoff += hbs * 8 * bytesperpixel;
1208 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1209 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1211 } else if (row + hbs < s->rows) {
1212 yoff += hbs * 8 * y_stride;
1213 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1214 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1219 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1221 int sb_start = ( idx * n) >> log2_n;
1222 int sb_end = ((idx + 1) * n) >> log2_n;
1223 *start = FFMIN(sb_start, n) << 3;
1224 *end = FFMIN(sb_end, n) << 3;
1227 static void free_buffers(VP9Context *s)
1231 av_freep(&s->intra_pred_data[0]);
1232 for (i = 0; i < s->active_tile_cols; i++)
1233 vp9_tile_data_free(&s->td[i]);
1236 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1238 VP9Context *s = avctx->priv_data;
1241 for (i = 0; i < 3; i++) {
1242 vp9_frame_unref(avctx, &s->s.frames[i]);
1243 av_frame_free(&s->s.frames[i].tf.f);
1245 av_buffer_pool_uninit(&s->frame_extradata_pool);
1246 for (i = 0; i < 8; i++) {
1247 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1248 av_frame_free(&s->s.refs[i].f);
1249 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1250 av_frame_free(&s->next_refs[i].f);
1254 vp9_free_entries(avctx);
1259 static int decode_tiles(AVCodecContext *avctx,
1260 const uint8_t *data, int size)
1262 VP9Context *s = avctx->priv_data;
1263 VP9TileData *td = &s->td[0];
1264 int row, col, tile_row, tile_col, ret;
1266 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1268 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1270 f = s->s.frames[CUR_FRAME].tf.f;
1271 ls_y = f->linesize[0];
1272 ls_uv =f->linesize[1];
1273 bytesperpixel = s->bytesperpixel;
1276 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1277 set_tile_offset(&tile_row_start, &tile_row_end,
1278 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1280 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1283 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1284 tile_row == s->s.h.tiling.tile_rows - 1) {
1287 tile_size = AV_RB32(data);
1291 if (tile_size > size) {
1292 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1293 return AVERROR_INVALIDDATA;
1295 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1298 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1299 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1300 return AVERROR_INVALIDDATA;
1306 for (row = tile_row_start; row < tile_row_end;
1307 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1308 VP9Filter *lflvl_ptr = s->lflvl;
1309 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1311 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1312 set_tile_offset(&tile_col_start, &tile_col_end,
1313 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1314 td->tile_col_start = tile_col_start;
1316 memset(td->left_partition_ctx, 0, 8);
1317 memset(td->left_skip_ctx, 0, 8);
1318 if (s->s.h.keyframe || s->s.h.intraonly) {
1319 memset(td->left_mode_ctx, DC_PRED, 16);
1321 memset(td->left_mode_ctx, NEARESTMV, 8);
1323 memset(td->left_y_nnz_ctx, 0, 16);
1324 memset(td->left_uv_nnz_ctx, 0, 32);
1325 memset(td->left_segpred_ctx, 0, 8);
1327 td->c = &td->c_b[tile_col];
1330 for (col = tile_col_start;
1332 col += 8, yoff2 += 64 * bytesperpixel,
1333 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1334 // FIXME integrate with lf code (i.e. zero after each
1335 // use, similar to invtxfm coefficients, or similar)
1337 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1341 decode_sb_mem(td, row, col, lflvl_ptr,
1342 yoff2, uvoff2, BL_64X64);
1344 if (vpX_rac_is_end(td->c)) {
1345 return AVERROR_INVALIDDATA;
1347 decode_sb(td, row, col, lflvl_ptr,
1348 yoff2, uvoff2, BL_64X64);
1356 // backup pre-loopfilter reconstruction data for intra
1357 // prediction of next row of sb64s
1358 if (row + 8 < s->rows) {
1359 memcpy(s->intra_pred_data[0],
1360 f->data[0] + yoff + 63 * ls_y,
1361 8 * s->cols * bytesperpixel);
1362 memcpy(s->intra_pred_data[1],
1363 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1364 8 * s->cols * bytesperpixel >> s->ss_h);
1365 memcpy(s->intra_pred_data[2],
1366 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1367 8 * s->cols * bytesperpixel >> s->ss_h);
1370 // loopfilter one row
1371 if (s->s.h.filter.level) {
1374 lflvl_ptr = s->lflvl;
1375 for (col = 0; col < s->cols;
1376 col += 8, yoff2 += 64 * bytesperpixel,
1377 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1378 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1383 // FIXME maybe we can make this more finegrained by running the
1384 // loopfilter per-block instead of after each sbrow
1385 // In fact that would also make intra pred left preparation easier?
1386 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1393 static av_always_inline
1394 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1397 VP9Context *s = avctx->priv_data;
1398 VP9TileData *td = &s->td[jobnr];
1399 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1400 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1401 unsigned tile_cols_len;
1402 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1403 VP9Filter *lflvl_ptr_base;
1406 f = s->s.frames[CUR_FRAME].tf.f;
1407 ls_y = f->linesize[0];
1408 ls_uv =f->linesize[1];
1410 set_tile_offset(&tile_col_start, &tile_col_end,
1411 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1412 td->tile_col_start = tile_col_start;
1413 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1414 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1415 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1417 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1418 set_tile_offset(&tile_row_start, &tile_row_end,
1419 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1421 td->c = &td->c_b[tile_row];
1422 for (row = tile_row_start; row < tile_row_end;
1423 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1424 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1425 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1427 memset(td->left_partition_ctx, 0, 8);
1428 memset(td->left_skip_ctx, 0, 8);
1429 if (s->s.h.keyframe || s->s.h.intraonly) {
1430 memset(td->left_mode_ctx, DC_PRED, 16);
1432 memset(td->left_mode_ctx, NEARESTMV, 8);
1434 memset(td->left_y_nnz_ctx, 0, 16);
1435 memset(td->left_uv_nnz_ctx, 0, 32);
1436 memset(td->left_segpred_ctx, 0, 8);
1438 for (col = tile_col_start;
1440 col += 8, yoff2 += 64 * bytesperpixel,
1441 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1442 // FIXME integrate with lf code (i.e. zero after each
1443 // use, similar to invtxfm coefficients, or similar)
1444 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1445 decode_sb(td, row, col, lflvl_ptr,
1446 yoff2, uvoff2, BL_64X64);
1449 // backup pre-loopfilter reconstruction data for intra
1450 // prediction of next row of sb64s
1451 tile_cols_len = tile_col_end - tile_col_start;
1452 if (row + 8 < s->rows) {
1453 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1454 f->data[0] + yoff + 63 * ls_y,
1455 8 * tile_cols_len * bytesperpixel);
1456 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1457 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1458 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1459 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1460 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1461 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1464 vp9_report_tile_progress(s, row >> 3, 1);
1470 static av_always_inline
1471 int loopfilter_proc(AVCodecContext *avctx)
1473 VP9Context *s = avctx->priv_data;
1474 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1475 VP9Filter *lflvl_ptr;
1476 int bytesperpixel = s->bytesperpixel, col, i;
1479 f = s->s.frames[CUR_FRAME].tf.f;
1480 ls_y = f->linesize[0];
1481 ls_uv =f->linesize[1];
1483 for (i = 0; i < s->sb_rows; i++) {
1484 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1486 if (s->s.h.filter.level) {
1487 yoff = (ls_y * 64)*i;
1488 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1489 lflvl_ptr = s->lflvl+s->sb_cols*i;
1490 for (col = 0; col < s->cols;
1491 col += 8, yoff += 64 * bytesperpixel,
1492 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1493 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1502 static int vp9_export_enc_params(VP9Context *s, VP9Frame *frame)
1504 AVVideoEncParams *par;
1505 unsigned int tile, nb_blocks = 0;
1507 if (s->s.h.segmentation.enabled) {
1508 for (tile = 0; tile < s->active_tile_cols; tile++)
1509 nb_blocks += s->td[tile].nb_block_structure;
1512 par = av_video_enc_params_create_side_data(frame->tf.f,
1513 AV_VIDEO_ENC_PARAMS_VP9, nb_blocks);
1515 return AVERROR(ENOMEM);
1517 par->qp = s->s.h.yac_qi;
1518 par->delta_qp[0][0] = s->s.h.ydc_qdelta;
1519 par->delta_qp[1][0] = s->s.h.uvdc_qdelta;
1520 par->delta_qp[2][0] = s->s.h.uvdc_qdelta;
1521 par->delta_qp[1][1] = s->s.h.uvac_qdelta;
1522 par->delta_qp[2][1] = s->s.h.uvac_qdelta;
1525 unsigned int block = 0;
1526 unsigned int tile, block_tile;
1528 for (tile = 0; tile < s->active_tile_cols; tile++) {
1529 VP9TileData *td = &s->td[tile];
1531 for (block_tile = 0; block_tile < td->nb_block_structure; block_tile++) {
1532 AVVideoBlockParams *b = av_video_enc_params_block(par, block++);
1533 unsigned int row = td->block_structure[block_tile].row;
1534 unsigned int col = td->block_structure[block_tile].col;
1535 uint8_t seg_id = frame->segmentation_map[row * 8 * s->sb_cols + col];
1539 b->w = 1 << (3 + td->block_structure[block_tile].block_size_idx_x);
1540 b->h = 1 << (3 + td->block_structure[block_tile].block_size_idx_y);
1542 if (s->s.h.segmentation.feat[seg_id].q_enabled) {
1543 b->delta_qp = s->s.h.segmentation.feat[seg_id].q_val;
1544 if (s->s.h.segmentation.absolute_vals)
1545 b->delta_qp -= par->qp;
1554 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1555 int *got_frame, AVPacket *pkt)
1557 const uint8_t *data = pkt->data;
1558 int size = pkt->size;
1559 VP9Context *s = avctx->priv_data;
1561 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1562 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1565 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1567 } else if (ret == 0) {
1568 if (!s->s.refs[ref].f->buf[0]) {
1569 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1570 return AVERROR_INVALIDDATA;
1572 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1574 ((AVFrame *)frame)->pts = pkt->pts;
1576 FF_DISABLE_DEPRECATION_WARNINGS
1577 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1578 FF_ENABLE_DEPRECATION_WARNINGS
1580 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1581 for (i = 0; i < 8; i++) {
1582 if (s->next_refs[i].f->buf[0])
1583 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1584 if (s->s.refs[i].f->buf[0] &&
1585 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1594 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1595 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1596 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1597 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1598 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1601 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1602 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1603 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1604 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1606 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1607 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1608 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1610 f = s->s.frames[CUR_FRAME].tf.f;
1611 f->key_frame = s->s.h.keyframe;
1612 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1614 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1615 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1616 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1617 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1621 for (i = 0; i < 8; i++) {
1622 if (s->next_refs[i].f->buf[0])
1623 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1624 if (s->s.h.refreshrefmask & (1 << i)) {
1625 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1626 } else if (s->s.refs[i].f->buf[0]) {
1627 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1633 if (avctx->hwaccel) {
1634 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1637 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1640 ret = avctx->hwaccel->end_frame(avctx);
1646 // main tile decode loop
1647 memset(s->above_partition_ctx, 0, s->cols);
1648 memset(s->above_skip_ctx, 0, s->cols);
1649 if (s->s.h.keyframe || s->s.h.intraonly) {
1650 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1652 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1654 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1655 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1656 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1657 memset(s->above_segpred_ctx, 0, s->cols);
1658 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1659 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1660 if ((ret = update_block_buffers(avctx)) < 0) {
1661 av_log(avctx, AV_LOG_ERROR,
1662 "Failed to allocate block buffers\n");
1665 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1668 for (i = 0; i < 4; i++) {
1669 for (j = 0; j < 2; j++)
1670 for (k = 0; k < 2; k++)
1671 for (l = 0; l < 6; l++)
1672 for (m = 0; m < 6; m++)
1673 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1674 s->prob.coef[i][j][k][l][m], 3);
1675 if (s->s.h.txfmmode == i)
1678 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1679 ff_thread_finish_setup(avctx);
1680 } else if (!s->s.h.refreshctx) {
1681 ff_thread_finish_setup(avctx);
1685 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1686 for (i = 0; i < s->sb_rows; i++)
1687 atomic_store(&s->entries[i], 0);
1692 for (i = 0; i < s->active_tile_cols; i++) {
1693 s->td[i].b = s->td[i].b_base;
1694 s->td[i].block = s->td[i].block_base;
1695 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1696 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1697 s->td[i].eob = s->td[i].eob_base;
1698 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1699 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1700 s->td[i].error_info = 0;
1704 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1705 int tile_row, tile_col;
1707 av_assert1(!s->pass);
1709 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1710 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1713 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1714 tile_row == s->s.h.tiling.tile_rows - 1) {
1717 tile_size = AV_RB32(data);
1721 if (tile_size > size)
1722 return AVERROR_INVALIDDATA;
1723 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1726 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1727 return AVERROR_INVALIDDATA;
1733 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1737 ret = decode_tiles(avctx, data, size);
1739 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1744 // Sum all counts fields into td[0].counts for tile threading
1745 if (avctx->active_thread_type == FF_THREAD_SLICE)
1746 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1747 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1748 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1750 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1751 ff_vp9_adapt_probs(s);
1752 ff_thread_finish_setup(avctx);
1754 } while (s->pass++ == 1);
1755 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1757 if (s->td->error_info < 0) {
1758 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1759 s->td->error_info = 0;
1760 return AVERROR_INVALIDDATA;
1762 if (avctx->export_side_data & AV_CODEC_EXPORT_DATA_VIDEO_ENC_PARAMS) {
1763 ret = vp9_export_enc_params(s, &s->s.frames[CUR_FRAME]);
1770 for (i = 0; i < 8; i++) {
1771 if (s->s.refs[i].f->buf[0])
1772 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1773 if (s->next_refs[i].f->buf[0] &&
1774 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1778 if (!s->s.h.invisible) {
1779 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1787 static void vp9_decode_flush(AVCodecContext *avctx)
1789 VP9Context *s = avctx->priv_data;
1792 for (i = 0; i < 3; i++)
1793 vp9_frame_unref(avctx, &s->s.frames[i]);
1794 for (i = 0; i < 8; i++)
1795 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1798 static int init_frames(AVCodecContext *avctx)
1800 VP9Context *s = avctx->priv_data;
1803 for (i = 0; i < 3; i++) {
1804 s->s.frames[i].tf.f = av_frame_alloc();
1805 if (!s->s.frames[i].tf.f) {
1806 vp9_decode_free(avctx);
1807 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1808 return AVERROR(ENOMEM);
1811 for (i = 0; i < 8; i++) {
1812 s->s.refs[i].f = av_frame_alloc();
1813 s->next_refs[i].f = av_frame_alloc();
1814 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1815 vp9_decode_free(avctx);
1816 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1817 return AVERROR(ENOMEM);
1824 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1826 VP9Context *s = avctx->priv_data;
1829 s->s.h.filter.sharpness = -1;
1831 return init_frames(avctx);
1835 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1838 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1840 for (i = 0; i < 3; i++) {
1841 if (s->s.frames[i].tf.f->buf[0])
1842 vp9_frame_unref(dst, &s->s.frames[i]);
1843 if (ssrc->s.frames[i].tf.f->buf[0]) {
1844 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1848 for (i = 0; i < 8; i++) {
1849 if (s->s.refs[i].f->buf[0])
1850 ff_thread_release_buffer(dst, &s->s.refs[i]);
1851 if (ssrc->next_refs[i].f->buf[0]) {
1852 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1857 s->s.h.invisible = ssrc->s.h.invisible;
1858 s->s.h.keyframe = ssrc->s.h.keyframe;
1859 s->s.h.intraonly = ssrc->s.h.intraonly;
1860 s->ss_v = ssrc->ss_v;
1861 s->ss_h = ssrc->ss_h;
1862 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1863 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1864 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1865 s->bytesperpixel = ssrc->bytesperpixel;
1866 s->gf_fmt = ssrc->gf_fmt;
1869 s->s.h.bpp = ssrc->s.h.bpp;
1870 s->bpp_index = ssrc->bpp_index;
1871 s->pix_fmt = ssrc->pix_fmt;
1872 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1873 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1874 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1875 sizeof(s->s.h.segmentation.feat));
1881 AVCodec ff_vp9_decoder = {
1883 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1884 .type = AVMEDIA_TYPE_VIDEO,
1885 .id = AV_CODEC_ID_VP9,
1886 .priv_data_size = sizeof(VP9Context),
1887 .init = vp9_decode_init,
1888 .close = vp9_decode_free,
1889 .decode = vp9_decode_frame,
1890 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1891 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1892 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1893 .flush = vp9_decode_flush,
1894 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1895 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1896 .bsfs = "vp9_superframe_split",
1897 .hw_configs = (const AVCodecHWConfigInternal *const []) {
1898 #if CONFIG_VP9_DXVA2_HWACCEL
1901 #if CONFIG_VP9_D3D11VA_HWACCEL
1902 HWACCEL_D3D11VA(vp9),
1904 #if CONFIG_VP9_D3D11VA2_HWACCEL
1905 HWACCEL_D3D11VA2(vp9),
1907 #if CONFIG_VP9_NVDEC_HWACCEL
1910 #if CONFIG_VP9_VAAPI_HWACCEL
1913 #if CONFIG_VP9_VDPAU_HWACCEL