2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
38 #define VP9_SYNCCODE 0x498342
41 static void vp9_free_entries(AVCodecContext *avctx) {
42 VP9Context *s = avctx->priv_data;
44 if (avctx->active_thread_type & FF_THREAD_SLICE) {
45 pthread_mutex_destroy(&s->progress_mutex);
46 pthread_cond_destroy(&s->progress_cond);
47 av_freep(&s->entries);
51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
52 VP9Context *s = avctx->priv_data;
55 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 av_freep(&s->entries);
59 s->entries = av_malloc_array(n, sizeof(atomic_int));
62 av_freep(&s->entries);
63 return AVERROR(ENOMEM);
66 for (i = 0; i < n; i++)
67 atomic_init(&s->entries[i], 0);
69 pthread_mutex_init(&s->progress_mutex, NULL);
70 pthread_cond_init(&s->progress_cond, NULL);
75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
76 pthread_mutex_lock(&s->progress_mutex);
77 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
78 pthread_cond_signal(&s->progress_cond);
79 pthread_mutex_unlock(&s->progress_mutex);
82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
83 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
86 pthread_mutex_lock(&s->progress_mutex);
87 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
88 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
89 pthread_mutex_unlock(&s->progress_mutex);
92 static void vp9_free_entries(AVCodecContext *avctx) {}
93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
98 ff_thread_release_buffer(avctx, &f->tf);
99 av_buffer_unref(&f->extradata);
100 av_buffer_unref(&f->hwaccel_priv_buf);
101 f->segmentation_map = NULL;
102 f->hwaccel_picture_private = NULL;
105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
107 VP9Context *s = avctx->priv_data;
110 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
114 sz = 64 * s->sb_cols * s->sb_rows;
115 if (sz != s->frame_extradata_pool_size) {
116 av_buffer_pool_uninit(&s->frame_extradata_pool);
117 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
118 if (!s->frame_extradata_pool) {
119 s->frame_extradata_pool_size = 0;
122 s->frame_extradata_pool_size = sz;
124 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
128 memset(f->extradata->data, 0, f->extradata->size);
130 f->segmentation_map = f->extradata->data;
131 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
133 if (avctx->hwaccel) {
134 const AVHWAccel *hwaccel = avctx->hwaccel;
135 av_assert0(!f->hwaccel_picture_private);
136 if (hwaccel->frame_priv_data_size) {
137 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
138 if (!f->hwaccel_priv_buf)
140 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
147 vp9_frame_unref(avctx, f);
148 return AVERROR(ENOMEM);
151 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
155 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
159 dst->extradata = av_buffer_ref(src->extradata);
163 dst->segmentation_map = src->segmentation_map;
165 dst->uses_2pass = src->uses_2pass;
167 if (src->hwaccel_picture_private) {
168 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
169 if (!dst->hwaccel_priv_buf)
171 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
177 vp9_frame_unref(avctx, dst);
178 return AVERROR(ENOMEM);
181 static int update_size(AVCodecContext *avctx, int w, int h)
183 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
184 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
185 CONFIG_VP9_NVDEC_HWACCEL + \
186 CONFIG_VP9_VAAPI_HWACCEL + \
187 CONFIG_VP9_VDPAU_HWACCEL)
188 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
189 VP9Context *s = avctx->priv_data;
191 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
194 av_assert0(w > 0 && h > 0);
196 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
197 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
200 switch (s->pix_fmt) {
201 case AV_PIX_FMT_YUV420P:
202 #if CONFIG_VP9_VDPAU_HWACCEL
203 *fmtp++ = AV_PIX_FMT_VDPAU;
205 case AV_PIX_FMT_YUV420P10:
206 #if CONFIG_VP9_DXVA2_HWACCEL
207 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
209 #if CONFIG_VP9_D3D11VA_HWACCEL
210 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
211 *fmtp++ = AV_PIX_FMT_D3D11;
213 #if CONFIG_VP9_NVDEC_HWACCEL
214 *fmtp++ = AV_PIX_FMT_CUDA;
216 #if CONFIG_VP9_VAAPI_HWACCEL
217 *fmtp++ = AV_PIX_FMT_VAAPI;
220 case AV_PIX_FMT_YUV420P12:
221 #if CONFIG_VP9_NVDEC_HWACCEL
222 *fmtp++ = AV_PIX_FMT_CUDA;
224 #if CONFIG_VP9_VAAPI_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VAAPI;
230 *fmtp++ = s->pix_fmt;
231 *fmtp = AV_PIX_FMT_NONE;
233 ret = ff_thread_get_format(avctx, pix_fmts);
237 avctx->pix_fmt = ret;
238 s->gf_fmt = s->pix_fmt;
246 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
249 s->last_fmt = s->pix_fmt;
250 s->sb_cols = (w + 63) >> 6;
251 s->sb_rows = (h + 63) >> 6;
252 s->cols = (w + 7) >> 3;
253 s->rows = (h + 7) >> 3;
254 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
256 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
257 av_freep(&s->intra_pred_data[0]);
258 // FIXME we slightly over-allocate here for subsampled chroma, but a little
259 // bit of padding shouldn't affect performance...
260 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
261 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
263 return AVERROR(ENOMEM);
264 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
265 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
266 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
267 assign(s->above_y_nnz_ctx, uint8_t *, 16);
268 assign(s->above_mode_ctx, uint8_t *, 16);
269 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
270 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
271 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
272 assign(s->above_partition_ctx, uint8_t *, 8);
273 assign(s->above_skip_ctx, uint8_t *, 8);
274 assign(s->above_txfm_ctx, uint8_t *, 8);
275 assign(s->above_segpred_ctx, uint8_t *, 8);
276 assign(s->above_intra_ctx, uint8_t *, 8);
277 assign(s->above_comp_ctx, uint8_t *, 8);
278 assign(s->above_ref_ctx, uint8_t *, 8);
279 assign(s->above_filter_ctx, uint8_t *, 8);
280 assign(s->lflvl, VP9Filter *, lflvl_len);
284 for (i = 0; i < s->active_tile_cols; i++) {
285 av_freep(&s->td[i].b_base);
286 av_freep(&s->td[i].block_base);
290 if (s->s.h.bpp != s->last_bpp) {
291 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
292 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
293 s->last_bpp = s->s.h.bpp;
299 static int update_block_buffers(AVCodecContext *avctx)
302 VP9Context *s = avctx->priv_data;
303 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
304 VP9TileData *td = &s->td[0];
306 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
310 av_free(td->block_base);
311 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
312 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
313 if (s->s.frames[CUR_FRAME].uses_2pass) {
314 int sbs = s->sb_cols * s->sb_rows;
316 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
317 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
318 16 * 16 + 2 * chroma_eobs) * sbs);
319 if (!td->b_base || !td->block_base)
320 return AVERROR(ENOMEM);
321 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
322 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
323 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
324 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
325 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
327 for (i = 1; i < s->active_tile_cols; i++) {
328 if (s->td[i].b_base && s->td[i].block_base) {
329 av_free(s->td[i].b_base);
330 av_free(s->td[i].block_base);
333 for (i = 0; i < s->active_tile_cols; i++) {
334 s->td[i].b_base = av_malloc(sizeof(VP9Block));
335 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
336 16 * 16 + 2 * chroma_eobs);
337 if (!s->td[i].b_base || !s->td[i].block_base)
338 return AVERROR(ENOMEM);
339 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
340 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
341 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
342 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
343 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
346 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
351 // The sign bit is at the end, not the start, of a bit sequence
352 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
354 int v = get_bits(gb, n);
355 return get_bits1(gb) ? -v : v;
358 static av_always_inline int inv_recenter_nonneg(int v, int m)
363 return m - ((v + 1) >> 1);
367 // differential forward probability updates
368 static int update_prob(VP56RangeCoder *c, int p)
370 static const uint8_t inv_map_table[255] = {
371 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
372 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
373 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
374 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
375 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
376 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
377 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
378 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
379 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
380 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
381 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
382 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
383 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
384 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
385 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
386 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
387 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
388 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
393 /* This code is trying to do a differential probability update. For a
394 * current probability A in the range [1, 255], the difference to a new
395 * probability of any value can be expressed differentially as 1-A, 255-A
396 * where some part of this (absolute range) exists both in positive as
397 * well as the negative part, whereas another part only exists in one
398 * half. We're trying to code this shared part differentially, i.e.
399 * times two where the value of the lowest bit specifies the sign, and
400 * the single part is then coded on top of this. This absolute difference
401 * then again has a value of [0, 254], but a bigger value in this range
402 * indicates that we're further away from the original value A, so we
403 * can code this as a VLC code, since higher values are increasingly
404 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
405 * updates vs. the 'fine, exact' updates further down the range, which
406 * adds one extra dimension to this differential update model. */
408 if (!vp8_rac_get(c)) {
409 d = vp8_rac_get_uint(c, 4) + 0;
410 } else if (!vp8_rac_get(c)) {
411 d = vp8_rac_get_uint(c, 4) + 16;
412 } else if (!vp8_rac_get(c)) {
413 d = vp8_rac_get_uint(c, 5) + 32;
415 d = vp8_rac_get_uint(c, 7);
417 d = (d << 1) - 65 + vp8_rac_get(c);
419 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
422 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
423 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
426 static int read_colorspace_details(AVCodecContext *avctx)
428 static const enum AVColorSpace colorspaces[8] = {
429 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
430 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
432 VP9Context *s = avctx->priv_data;
433 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
436 s->s.h.bpp = 8 + bits * 2;
437 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
438 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
439 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
440 static const enum AVPixelFormat pix_fmt_rgb[3] = {
441 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
443 s->ss_h = s->ss_v = 0;
444 avctx->color_range = AVCOL_RANGE_JPEG;
445 s->pix_fmt = pix_fmt_rgb[bits];
446 if (avctx->profile & 1) {
447 if (get_bits1(&s->gb)) {
448 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
449 return AVERROR_INVALIDDATA;
452 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
454 return AVERROR_INVALIDDATA;
457 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
458 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
459 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
460 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
461 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
462 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
463 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
465 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
466 if (avctx->profile & 1) {
467 s->ss_h = get_bits1(&s->gb);
468 s->ss_v = get_bits1(&s->gb);
469 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
470 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
471 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
473 return AVERROR_INVALIDDATA;
474 } else if (get_bits1(&s->gb)) {
475 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
477 return AVERROR_INVALIDDATA;
480 s->ss_h = s->ss_v = 1;
481 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
488 static int decode_frame_header(AVCodecContext *avctx,
489 const uint8_t *data, int size, int *ref)
491 VP9Context *s = avctx->priv_data;
492 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
494 const uint8_t *data2;
497 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
498 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
501 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
502 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
503 return AVERROR_INVALIDDATA;
505 avctx->profile = get_bits1(&s->gb);
506 avctx->profile |= get_bits1(&s->gb) << 1;
507 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
508 if (avctx->profile > 3) {
509 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
510 return AVERROR_INVALIDDATA;
512 s->s.h.profile = avctx->profile;
513 if (get_bits1(&s->gb)) {
514 *ref = get_bits(&s->gb, 3);
518 s->last_keyframe = s->s.h.keyframe;
519 s->s.h.keyframe = !get_bits1(&s->gb);
521 last_invisible = s->s.h.invisible;
522 s->s.h.invisible = !get_bits1(&s->gb);
523 s->s.h.errorres = get_bits1(&s->gb);
524 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
526 if (s->s.h.keyframe) {
527 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
528 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
529 return AVERROR_INVALIDDATA;
531 if ((ret = read_colorspace_details(avctx)) < 0)
533 // for profile 1, here follows the subsampling bits
534 s->s.h.refreshrefmask = 0xff;
535 w = get_bits(&s->gb, 16) + 1;
536 h = get_bits(&s->gb, 16) + 1;
537 if (get_bits1(&s->gb)) // display size
538 skip_bits(&s->gb, 32);
540 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
541 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
542 if (s->s.h.intraonly) {
543 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
544 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
545 return AVERROR_INVALIDDATA;
547 if (avctx->profile >= 1) {
548 if ((ret = read_colorspace_details(avctx)) < 0)
551 s->ss_h = s->ss_v = 1;
554 s->bytesperpixel = 1;
555 s->pix_fmt = AV_PIX_FMT_YUV420P;
556 avctx->colorspace = AVCOL_SPC_BT470BG;
557 avctx->color_range = AVCOL_RANGE_MPEG;
559 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
560 w = get_bits(&s->gb, 16) + 1;
561 h = get_bits(&s->gb, 16) + 1;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
565 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
566 s->s.h.refidx[0] = get_bits(&s->gb, 3);
567 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
568 s->s.h.refidx[1] = get_bits(&s->gb, 3);
569 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
570 s->s.h.refidx[2] = get_bits(&s->gb, 3);
571 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
572 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
573 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
574 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
575 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
576 return AVERROR_INVALIDDATA;
578 if (get_bits1(&s->gb)) {
579 w = s->s.refs[s->s.h.refidx[0]].f->width;
580 h = s->s.refs[s->s.h.refidx[0]].f->height;
581 } else if (get_bits1(&s->gb)) {
582 w = s->s.refs[s->s.h.refidx[1]].f->width;
583 h = s->s.refs[s->s.h.refidx[1]].f->height;
584 } else if (get_bits1(&s->gb)) {
585 w = s->s.refs[s->s.h.refidx[2]].f->width;
586 h = s->s.refs[s->s.h.refidx[2]].f->height;
588 w = get_bits(&s->gb, 16) + 1;
589 h = get_bits(&s->gb, 16) + 1;
591 // Note that in this code, "CUR_FRAME" is actually before we
592 // have formally allocated a frame, and thus actually represents
594 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
595 s->s.frames[CUR_FRAME].tf.f->height == h;
596 if (get_bits1(&s->gb)) // display size
597 skip_bits(&s->gb, 32);
598 s->s.h.highprecisionmvs = get_bits1(&s->gb);
599 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
601 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
602 s->s.h.signbias[0] != s->s.h.signbias[2];
603 if (s->s.h.allowcompinter) {
604 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
605 s->s.h.fixcompref = 2;
606 s->s.h.varcompref[0] = 0;
607 s->s.h.varcompref[1] = 1;
608 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
609 s->s.h.fixcompref = 1;
610 s->s.h.varcompref[0] = 0;
611 s->s.h.varcompref[1] = 2;
613 s->s.h.fixcompref = 0;
614 s->s.h.varcompref[0] = 1;
615 s->s.h.varcompref[1] = 2;
620 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
621 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
622 s->s.h.framectxid = c = get_bits(&s->gb, 2);
623 if (s->s.h.keyframe || s->s.h.intraonly)
624 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
626 /* loopfilter header data */
627 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
628 // reset loopfilter defaults
629 s->s.h.lf_delta.ref[0] = 1;
630 s->s.h.lf_delta.ref[1] = 0;
631 s->s.h.lf_delta.ref[2] = -1;
632 s->s.h.lf_delta.ref[3] = -1;
633 s->s.h.lf_delta.mode[0] = 0;
634 s->s.h.lf_delta.mode[1] = 0;
635 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
637 s->s.h.filter.level = get_bits(&s->gb, 6);
638 sharp = get_bits(&s->gb, 3);
639 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
640 // the old cache values since they are still valid
641 if (s->s.h.filter.sharpness != sharp) {
642 for (i = 1; i <= 63; i++) {
646 limit >>= (sharp + 3) >> 2;
647 limit = FFMIN(limit, 9 - sharp);
649 limit = FFMAX(limit, 1);
651 s->filter_lut.lim_lut[i] = limit;
652 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
655 s->s.h.filter.sharpness = sharp;
656 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
657 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
658 for (i = 0; i < 4; i++)
659 if (get_bits1(&s->gb))
660 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
661 for (i = 0; i < 2; i++)
662 if (get_bits1(&s->gb))
663 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
667 /* quantization header data */
668 s->s.h.yac_qi = get_bits(&s->gb, 8);
669 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
670 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
671 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
672 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
673 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
675 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
677 /* segmentation header info */
678 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
679 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
680 for (i = 0; i < 7; i++)
681 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
682 get_bits(&s->gb, 8) : 255;
683 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
684 for (i = 0; i < 3; i++)
685 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
686 get_bits(&s->gb, 8) : 255;
689 if (get_bits1(&s->gb)) {
690 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
691 for (i = 0; i < 8; i++) {
692 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
693 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
694 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
695 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
696 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
697 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
698 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
703 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
704 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
705 int qyac, qydc, quvac, quvdc, lflvl, sh;
707 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
708 if (s->s.h.segmentation.absolute_vals)
709 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
711 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
713 qyac = s->s.h.yac_qi;
715 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
716 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
717 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
718 qyac = av_clip_uintp2(qyac, 8);
720 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
721 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
722 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
723 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
725 sh = s->s.h.filter.level >= 32;
726 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
727 if (s->s.h.segmentation.absolute_vals)
728 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
730 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
732 lflvl = s->s.h.filter.level;
734 if (s->s.h.lf_delta.enabled) {
735 s->s.h.segmentation.feat[i].lflvl[0][0] =
736 s->s.h.segmentation.feat[i].lflvl[0][1] =
737 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
738 for (j = 1; j < 4; j++) {
739 s->s.h.segmentation.feat[i].lflvl[j][0] =
740 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
741 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
742 s->s.h.segmentation.feat[i].lflvl[j][1] =
743 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
744 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
747 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
748 sizeof(s->s.h.segmentation.feat[i].lflvl));
753 if ((ret = update_size(avctx, w, h)) < 0) {
754 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
758 for (s->s.h.tiling.log2_tile_cols = 0;
759 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
760 s->s.h.tiling.log2_tile_cols++) ;
761 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
762 max = FFMAX(0, max - 1);
763 while (max > s->s.h.tiling.log2_tile_cols) {
764 if (get_bits1(&s->gb))
765 s->s.h.tiling.log2_tile_cols++;
769 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
770 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
771 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
776 for (i = 0; i < s->active_tile_cols; i++) {
777 av_free(s->td[i].b_base);
778 av_free(s->td[i].block_base);
783 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
784 vp9_free_entries(avctx);
785 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
786 s->s.h.tiling.tile_cols : 1;
787 vp9_alloc_entries(avctx, s->sb_rows);
788 if (avctx->active_thread_type == FF_THREAD_SLICE) {
789 n_range_coders = 4; // max_tile_rows
791 n_range_coders = s->s.h.tiling.tile_cols;
793 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
794 n_range_coders * sizeof(VP56RangeCoder));
796 return AVERROR(ENOMEM);
797 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
798 for (i = 0; i < s->active_tile_cols; i++) {
801 rc += n_range_coders;
805 /* check reference frames */
806 if (!s->s.h.keyframe && !s->s.h.intraonly) {
807 int valid_ref_frame = 0;
808 for (i = 0; i < 3; i++) {
809 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
810 int refw = ref->width, refh = ref->height;
812 if (ref->format != avctx->pix_fmt) {
813 av_log(avctx, AV_LOG_ERROR,
814 "Ref pixfmt (%s) did not match current frame (%s)",
815 av_get_pix_fmt_name(ref->format),
816 av_get_pix_fmt_name(avctx->pix_fmt));
817 return AVERROR_INVALIDDATA;
818 } else if (refw == w && refh == h) {
819 s->mvscale[i][0] = s->mvscale[i][1] = 0;
821 /* Check to make sure at least one of frames that */
822 /* this frame references has valid dimensions */
823 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
824 av_log(avctx, AV_LOG_WARNING,
825 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
827 s->mvscale[i][0] = s->mvscale[i][1] = REF_INVALID_SCALE;
830 s->mvscale[i][0] = (refw << 14) / w;
831 s->mvscale[i][1] = (refh << 14) / h;
832 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
833 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
837 if (!valid_ref_frame) {
838 av_log(avctx, AV_LOG_ERROR, "No valid reference frame is found, bitstream not supported\n");
839 return AVERROR_INVALIDDATA;
843 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
844 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
845 s->prob_ctx[3].p = ff_vp9_default_probs;
846 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
847 sizeof(ff_vp9_default_coef_probs));
848 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
849 sizeof(ff_vp9_default_coef_probs));
850 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
851 sizeof(ff_vp9_default_coef_probs));
852 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
853 sizeof(ff_vp9_default_coef_probs));
854 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
855 s->prob_ctx[c].p = ff_vp9_default_probs;
856 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
857 sizeof(ff_vp9_default_coef_probs));
860 // next 16 bits is size of the rest of the header (arith-coded)
861 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
862 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
864 data2 = align_get_bits(&s->gb);
865 if (size2 > size - (data2 - data)) {
866 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
867 return AVERROR_INVALIDDATA;
869 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
873 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
874 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
875 return AVERROR_INVALIDDATA;
878 for (i = 0; i < s->active_tile_cols; i++) {
879 if (s->s.h.keyframe || s->s.h.intraonly) {
880 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
881 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
883 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
887 /* FIXME is it faster to not copy here, but do it down in the fw updates
888 * as explicit copies if the fw update is missing (and skip the copy upon
890 s->prob.p = s->prob_ctx[c].p;
893 if (s->s.h.lossless) {
894 s->s.h.txfmmode = TX_4X4;
896 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
897 if (s->s.h.txfmmode == 3)
898 s->s.h.txfmmode += vp8_rac_get(&s->c);
900 if (s->s.h.txfmmode == TX_SWITCHABLE) {
901 for (i = 0; i < 2; i++)
902 if (vp56_rac_get_prob_branchy(&s->c, 252))
903 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
904 for (i = 0; i < 2; i++)
905 for (j = 0; j < 2; j++)
906 if (vp56_rac_get_prob_branchy(&s->c, 252))
907 s->prob.p.tx16p[i][j] =
908 update_prob(&s->c, s->prob.p.tx16p[i][j]);
909 for (i = 0; i < 2; i++)
910 for (j = 0; j < 3; j++)
911 if (vp56_rac_get_prob_branchy(&s->c, 252))
912 s->prob.p.tx32p[i][j] =
913 update_prob(&s->c, s->prob.p.tx32p[i][j]);
918 for (i = 0; i < 4; i++) {
919 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
920 if (vp8_rac_get(&s->c)) {
921 for (j = 0; j < 2; j++)
922 for (k = 0; k < 2; k++)
923 for (l = 0; l < 6; l++)
924 for (m = 0; m < 6; m++) {
925 uint8_t *p = s->prob.coef[i][j][k][l][m];
926 uint8_t *r = ref[j][k][l][m];
927 if (m >= 3 && l == 0) // dc only has 3 pt
929 for (n = 0; n < 3; n++) {
930 if (vp56_rac_get_prob_branchy(&s->c, 252))
931 p[n] = update_prob(&s->c, r[n]);
935 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
938 for (j = 0; j < 2; j++)
939 for (k = 0; k < 2; k++)
940 for (l = 0; l < 6; l++)
941 for (m = 0; m < 6; m++) {
942 uint8_t *p = s->prob.coef[i][j][k][l][m];
943 uint8_t *r = ref[j][k][l][m];
944 if (m > 3 && l == 0) // dc only has 3 pt
947 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
950 if (s->s.h.txfmmode == i)
955 for (i = 0; i < 3; i++)
956 if (vp56_rac_get_prob_branchy(&s->c, 252))
957 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
958 if (!s->s.h.keyframe && !s->s.h.intraonly) {
959 for (i = 0; i < 7; i++)
960 for (j = 0; j < 3; j++)
961 if (vp56_rac_get_prob_branchy(&s->c, 252))
962 s->prob.p.mv_mode[i][j] =
963 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
965 if (s->s.h.filtermode == FILTER_SWITCHABLE)
966 for (i = 0; i < 4; i++)
967 for (j = 0; j < 2; j++)
968 if (vp56_rac_get_prob_branchy(&s->c, 252))
969 s->prob.p.filter[i][j] =
970 update_prob(&s->c, s->prob.p.filter[i][j]);
972 for (i = 0; i < 4; i++)
973 if (vp56_rac_get_prob_branchy(&s->c, 252))
974 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
976 if (s->s.h.allowcompinter) {
977 s->s.h.comppredmode = vp8_rac_get(&s->c);
978 if (s->s.h.comppredmode)
979 s->s.h.comppredmode += vp8_rac_get(&s->c);
980 if (s->s.h.comppredmode == PRED_SWITCHABLE)
981 for (i = 0; i < 5; i++)
982 if (vp56_rac_get_prob_branchy(&s->c, 252))
984 update_prob(&s->c, s->prob.p.comp[i]);
986 s->s.h.comppredmode = PRED_SINGLEREF;
989 if (s->s.h.comppredmode != PRED_COMPREF) {
990 for (i = 0; i < 5; i++) {
991 if (vp56_rac_get_prob_branchy(&s->c, 252))
992 s->prob.p.single_ref[i][0] =
993 update_prob(&s->c, s->prob.p.single_ref[i][0]);
994 if (vp56_rac_get_prob_branchy(&s->c, 252))
995 s->prob.p.single_ref[i][1] =
996 update_prob(&s->c, s->prob.p.single_ref[i][1]);
1000 if (s->s.h.comppredmode != PRED_SINGLEREF) {
1001 for (i = 0; i < 5; i++)
1002 if (vp56_rac_get_prob_branchy(&s->c, 252))
1003 s->prob.p.comp_ref[i] =
1004 update_prob(&s->c, s->prob.p.comp_ref[i]);
1007 for (i = 0; i < 4; i++)
1008 for (j = 0; j < 9; j++)
1009 if (vp56_rac_get_prob_branchy(&s->c, 252))
1010 s->prob.p.y_mode[i][j] =
1011 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1013 for (i = 0; i < 4; i++)
1014 for (j = 0; j < 4; j++)
1015 for (k = 0; k < 3; k++)
1016 if (vp56_rac_get_prob_branchy(&s->c, 252))
1017 s->prob.p.partition[3 - i][j][k] =
1019 s->prob.p.partition[3 - i][j][k]);
1021 // mv fields don't use the update_prob subexp model for some reason
1022 for (i = 0; i < 3; i++)
1023 if (vp56_rac_get_prob_branchy(&s->c, 252))
1024 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1026 for (i = 0; i < 2; i++) {
1027 if (vp56_rac_get_prob_branchy(&s->c, 252))
1028 s->prob.p.mv_comp[i].sign =
1029 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031 for (j = 0; j < 10; j++)
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].classes[j] =
1034 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1036 if (vp56_rac_get_prob_branchy(&s->c, 252))
1037 s->prob.p.mv_comp[i].class0 =
1038 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1040 for (j = 0; j < 10; j++)
1041 if (vp56_rac_get_prob_branchy(&s->c, 252))
1042 s->prob.p.mv_comp[i].bits[j] =
1043 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1046 for (i = 0; i < 2; i++) {
1047 for (j = 0; j < 2; j++)
1048 for (k = 0; k < 3; k++)
1049 if (vp56_rac_get_prob_branchy(&s->c, 252))
1050 s->prob.p.mv_comp[i].class0_fp[j][k] =
1051 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1053 for (j = 0; j < 3; j++)
1054 if (vp56_rac_get_prob_branchy(&s->c, 252))
1055 s->prob.p.mv_comp[i].fp[j] =
1056 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1059 if (s->s.h.highprecisionmvs) {
1060 for (i = 0; i < 2; i++) {
1061 if (vp56_rac_get_prob_branchy(&s->c, 252))
1062 s->prob.p.mv_comp[i].class0_hp =
1063 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1065 if (vp56_rac_get_prob_branchy(&s->c, 252))
1066 s->prob.p.mv_comp[i].hp =
1067 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1072 return (data2 - data) + size2;
1075 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1076 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1078 const VP9Context *s = td->s;
1079 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1080 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1081 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1082 s->prob.p.partition[bl][c];
1083 enum BlockPartition bp;
1084 ptrdiff_t hbs = 4 >> bl;
1085 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1086 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1087 int bytesperpixel = s->bytesperpixel;
1090 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1091 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1092 } else if (col + hbs < s->cols) { // FIXME why not <=?
1093 if (row + hbs < s->rows) { // FIXME why not <=?
1094 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1096 case PARTITION_NONE:
1097 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1100 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1101 yoff += hbs * 8 * y_stride;
1102 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1103 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1106 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1107 yoff += hbs * 8 * bytesperpixel;
1108 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1109 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1111 case PARTITION_SPLIT:
1112 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1113 decode_sb(td, row, col + hbs, lflvl,
1114 yoff + 8 * hbs * bytesperpixel,
1115 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1116 yoff += hbs * 8 * y_stride;
1117 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1118 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1119 decode_sb(td, row + hbs, col + hbs, lflvl,
1120 yoff + 8 * hbs * bytesperpixel,
1121 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1126 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1127 bp = PARTITION_SPLIT;
1128 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1129 decode_sb(td, row, col + hbs, lflvl,
1130 yoff + 8 * hbs * bytesperpixel,
1131 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1134 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1136 } else if (row + hbs < s->rows) { // FIXME why not <=?
1137 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1138 bp = PARTITION_SPLIT;
1139 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1140 yoff += hbs * 8 * y_stride;
1141 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1142 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1145 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1148 bp = PARTITION_SPLIT;
1149 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1151 td->counts.partition[bl][c][bp]++;
1154 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1155 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1157 const VP9Context *s = td->s;
1158 VP9Block *b = td->b;
1159 ptrdiff_t hbs = 4 >> bl;
1160 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1161 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1162 int bytesperpixel = s->bytesperpixel;
1165 av_assert2(b->bl == BL_8X8);
1166 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1167 } else if (td->b->bl == bl) {
1168 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1169 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1170 yoff += hbs * 8 * y_stride;
1171 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1172 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1173 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1174 yoff += hbs * 8 * bytesperpixel;
1175 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1176 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1179 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1180 if (col + hbs < s->cols) { // FIXME why not <=?
1181 if (row + hbs < s->rows) {
1182 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1183 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1184 yoff += hbs * 8 * y_stride;
1185 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1186 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1187 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1188 yoff + 8 * hbs * bytesperpixel,
1189 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1191 yoff += hbs * 8 * bytesperpixel;
1192 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1193 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1195 } else if (row + hbs < s->rows) {
1196 yoff += hbs * 8 * y_stride;
1197 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1198 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1203 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1205 int sb_start = ( idx * n) >> log2_n;
1206 int sb_end = ((idx + 1) * n) >> log2_n;
1207 *start = FFMIN(sb_start, n) << 3;
1208 *end = FFMIN(sb_end, n) << 3;
1211 static void free_buffers(VP9Context *s)
1215 av_freep(&s->intra_pred_data[0]);
1216 for (i = 0; i < s->active_tile_cols; i++) {
1217 av_freep(&s->td[i].b_base);
1218 av_freep(&s->td[i].block_base);
1222 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1224 VP9Context *s = avctx->priv_data;
1227 for (i = 0; i < 3; i++) {
1228 vp9_frame_unref(avctx, &s->s.frames[i]);
1229 av_frame_free(&s->s.frames[i].tf.f);
1231 av_buffer_pool_uninit(&s->frame_extradata_pool);
1232 for (i = 0; i < 8; i++) {
1233 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1234 av_frame_free(&s->s.refs[i].f);
1235 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1236 av_frame_free(&s->next_refs[i].f);
1240 vp9_free_entries(avctx);
1245 static int decode_tiles(AVCodecContext *avctx,
1246 const uint8_t *data, int size)
1248 VP9Context *s = avctx->priv_data;
1249 VP9TileData *td = &s->td[0];
1250 int row, col, tile_row, tile_col, ret;
1252 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1254 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1256 f = s->s.frames[CUR_FRAME].tf.f;
1257 ls_y = f->linesize[0];
1258 ls_uv =f->linesize[1];
1259 bytesperpixel = s->bytesperpixel;
1262 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1263 set_tile_offset(&tile_row_start, &tile_row_end,
1264 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1266 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1269 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1270 tile_row == s->s.h.tiling.tile_rows - 1) {
1273 tile_size = AV_RB32(data);
1277 if (tile_size > size) {
1278 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1279 return AVERROR_INVALIDDATA;
1281 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1284 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1285 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1286 return AVERROR_INVALIDDATA;
1292 for (row = tile_row_start; row < tile_row_end;
1293 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1294 VP9Filter *lflvl_ptr = s->lflvl;
1295 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1297 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1298 set_tile_offset(&tile_col_start, &tile_col_end,
1299 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1300 td->tile_col_start = tile_col_start;
1302 memset(td->left_partition_ctx, 0, 8);
1303 memset(td->left_skip_ctx, 0, 8);
1304 if (s->s.h.keyframe || s->s.h.intraonly) {
1305 memset(td->left_mode_ctx, DC_PRED, 16);
1307 memset(td->left_mode_ctx, NEARESTMV, 8);
1309 memset(td->left_y_nnz_ctx, 0, 16);
1310 memset(td->left_uv_nnz_ctx, 0, 32);
1311 memset(td->left_segpred_ctx, 0, 8);
1313 td->c = &td->c_b[tile_col];
1316 for (col = tile_col_start;
1318 col += 8, yoff2 += 64 * bytesperpixel,
1319 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1320 // FIXME integrate with lf code (i.e. zero after each
1321 // use, similar to invtxfm coefficients, or similar)
1323 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1327 decode_sb_mem(td, row, col, lflvl_ptr,
1328 yoff2, uvoff2, BL_64X64);
1330 if (vpX_rac_is_end(td->c)) {
1331 return AVERROR_INVALIDDATA;
1333 decode_sb(td, row, col, lflvl_ptr,
1334 yoff2, uvoff2, BL_64X64);
1342 // backup pre-loopfilter reconstruction data for intra
1343 // prediction of next row of sb64s
1344 if (row + 8 < s->rows) {
1345 memcpy(s->intra_pred_data[0],
1346 f->data[0] + yoff + 63 * ls_y,
1347 8 * s->cols * bytesperpixel);
1348 memcpy(s->intra_pred_data[1],
1349 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1350 8 * s->cols * bytesperpixel >> s->ss_h);
1351 memcpy(s->intra_pred_data[2],
1352 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1353 8 * s->cols * bytesperpixel >> s->ss_h);
1356 // loopfilter one row
1357 if (s->s.h.filter.level) {
1360 lflvl_ptr = s->lflvl;
1361 for (col = 0; col < s->cols;
1362 col += 8, yoff2 += 64 * bytesperpixel,
1363 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1364 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1369 // FIXME maybe we can make this more finegrained by running the
1370 // loopfilter per-block instead of after each sbrow
1371 // In fact that would also make intra pred left preparation easier?
1372 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1379 static av_always_inline
1380 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1383 VP9Context *s = avctx->priv_data;
1384 VP9TileData *td = &s->td[jobnr];
1385 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1386 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1387 unsigned tile_cols_len;
1388 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1389 VP9Filter *lflvl_ptr_base;
1392 f = s->s.frames[CUR_FRAME].tf.f;
1393 ls_y = f->linesize[0];
1394 ls_uv =f->linesize[1];
1396 set_tile_offset(&tile_col_start, &tile_col_end,
1397 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1398 td->tile_col_start = tile_col_start;
1399 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1400 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1401 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1403 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1404 set_tile_offset(&tile_row_start, &tile_row_end,
1405 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1407 td->c = &td->c_b[tile_row];
1408 for (row = tile_row_start; row < tile_row_end;
1409 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1410 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1411 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1413 memset(td->left_partition_ctx, 0, 8);
1414 memset(td->left_skip_ctx, 0, 8);
1415 if (s->s.h.keyframe || s->s.h.intraonly) {
1416 memset(td->left_mode_ctx, DC_PRED, 16);
1418 memset(td->left_mode_ctx, NEARESTMV, 8);
1420 memset(td->left_y_nnz_ctx, 0, 16);
1421 memset(td->left_uv_nnz_ctx, 0, 32);
1422 memset(td->left_segpred_ctx, 0, 8);
1424 for (col = tile_col_start;
1426 col += 8, yoff2 += 64 * bytesperpixel,
1427 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1428 // FIXME integrate with lf code (i.e. zero after each
1429 // use, similar to invtxfm coefficients, or similar)
1430 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1431 decode_sb(td, row, col, lflvl_ptr,
1432 yoff2, uvoff2, BL_64X64);
1435 // backup pre-loopfilter reconstruction data for intra
1436 // prediction of next row of sb64s
1437 tile_cols_len = tile_col_end - tile_col_start;
1438 if (row + 8 < s->rows) {
1439 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1440 f->data[0] + yoff + 63 * ls_y,
1441 8 * tile_cols_len * bytesperpixel);
1442 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1443 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1444 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1445 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1446 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1447 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1450 vp9_report_tile_progress(s, row >> 3, 1);
1456 static av_always_inline
1457 int loopfilter_proc(AVCodecContext *avctx)
1459 VP9Context *s = avctx->priv_data;
1460 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1461 VP9Filter *lflvl_ptr;
1462 int bytesperpixel = s->bytesperpixel, col, i;
1465 f = s->s.frames[CUR_FRAME].tf.f;
1466 ls_y = f->linesize[0];
1467 ls_uv =f->linesize[1];
1469 for (i = 0; i < s->sb_rows; i++) {
1470 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1472 if (s->s.h.filter.level) {
1473 yoff = (ls_y * 64)*i;
1474 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1475 lflvl_ptr = s->lflvl+s->sb_cols*i;
1476 for (col = 0; col < s->cols;
1477 col += 8, yoff += 64 * bytesperpixel,
1478 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1479 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1488 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1489 int *got_frame, AVPacket *pkt)
1491 const uint8_t *data = pkt->data;
1492 int size = pkt->size;
1493 VP9Context *s = avctx->priv_data;
1495 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1496 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1499 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1501 } else if (ret == 0) {
1502 if (!s->s.refs[ref].f->buf[0]) {
1503 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1504 return AVERROR_INVALIDDATA;
1506 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1508 ((AVFrame *)frame)->pts = pkt->pts;
1510 FF_DISABLE_DEPRECATION_WARNINGS
1511 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1512 FF_ENABLE_DEPRECATION_WARNINGS
1514 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1515 for (i = 0; i < 8; i++) {
1516 if (s->next_refs[i].f->buf[0])
1517 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1518 if (s->s.refs[i].f->buf[0] &&
1519 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1528 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1529 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1530 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1531 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1532 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1535 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1536 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1537 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1538 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1540 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1541 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1542 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1544 f = s->s.frames[CUR_FRAME].tf.f;
1545 f->key_frame = s->s.h.keyframe;
1546 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1548 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1549 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1550 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1551 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1555 for (i = 0; i < 8; i++) {
1556 if (s->next_refs[i].f->buf[0])
1557 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1558 if (s->s.h.refreshrefmask & (1 << i)) {
1559 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1560 } else if (s->s.refs[i].f->buf[0]) {
1561 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1567 if (avctx->hwaccel) {
1568 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1571 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1574 ret = avctx->hwaccel->end_frame(avctx);
1580 // main tile decode loop
1581 memset(s->above_partition_ctx, 0, s->cols);
1582 memset(s->above_skip_ctx, 0, s->cols);
1583 if (s->s.h.keyframe || s->s.h.intraonly) {
1584 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1586 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1588 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1589 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1590 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1591 memset(s->above_segpred_ctx, 0, s->cols);
1592 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1593 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1594 if ((ret = update_block_buffers(avctx)) < 0) {
1595 av_log(avctx, AV_LOG_ERROR,
1596 "Failed to allocate block buffers\n");
1599 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1602 for (i = 0; i < 4; i++) {
1603 for (j = 0; j < 2; j++)
1604 for (k = 0; k < 2; k++)
1605 for (l = 0; l < 6; l++)
1606 for (m = 0; m < 6; m++)
1607 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1608 s->prob.coef[i][j][k][l][m], 3);
1609 if (s->s.h.txfmmode == i)
1612 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1613 ff_thread_finish_setup(avctx);
1614 } else if (!s->s.h.refreshctx) {
1615 ff_thread_finish_setup(avctx);
1619 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1620 for (i = 0; i < s->sb_rows; i++)
1621 atomic_store(&s->entries[i], 0);
1626 for (i = 0; i < s->active_tile_cols; i++) {
1627 s->td[i].b = s->td[i].b_base;
1628 s->td[i].block = s->td[i].block_base;
1629 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1630 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1631 s->td[i].eob = s->td[i].eob_base;
1632 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1633 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1634 s->td[i].error_info = 0;
1638 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1639 int tile_row, tile_col;
1641 av_assert1(!s->pass);
1643 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1644 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1647 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1648 tile_row == s->s.h.tiling.tile_rows - 1) {
1651 tile_size = AV_RB32(data);
1655 if (tile_size > size)
1656 return AVERROR_INVALIDDATA;
1657 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1660 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1661 return AVERROR_INVALIDDATA;
1667 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1671 ret = decode_tiles(avctx, data, size);
1673 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1678 // Sum all counts fields into td[0].counts for tile threading
1679 if (avctx->active_thread_type == FF_THREAD_SLICE)
1680 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1681 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1682 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1684 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1685 ff_vp9_adapt_probs(s);
1686 ff_thread_finish_setup(avctx);
1688 } while (s->pass++ == 1);
1689 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1691 if (s->td->error_info < 0) {
1692 av_log(avctx, AV_LOG_ERROR, "Failed to decode tile data\n");
1693 s->td->error_info = 0;
1694 return AVERROR_INVALIDDATA;
1699 for (i = 0; i < 8; i++) {
1700 if (s->s.refs[i].f->buf[0])
1701 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1702 if (s->next_refs[i].f->buf[0] &&
1703 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1707 if (!s->s.h.invisible) {
1708 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1716 static void vp9_decode_flush(AVCodecContext *avctx)
1718 VP9Context *s = avctx->priv_data;
1721 for (i = 0; i < 3; i++)
1722 vp9_frame_unref(avctx, &s->s.frames[i]);
1723 for (i = 0; i < 8; i++)
1724 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1727 static int init_frames(AVCodecContext *avctx)
1729 VP9Context *s = avctx->priv_data;
1732 for (i = 0; i < 3; i++) {
1733 s->s.frames[i].tf.f = av_frame_alloc();
1734 if (!s->s.frames[i].tf.f) {
1735 vp9_decode_free(avctx);
1736 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1737 return AVERROR(ENOMEM);
1740 for (i = 0; i < 8; i++) {
1741 s->s.refs[i].f = av_frame_alloc();
1742 s->next_refs[i].f = av_frame_alloc();
1743 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1744 vp9_decode_free(avctx);
1745 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1746 return AVERROR(ENOMEM);
1753 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1755 VP9Context *s = avctx->priv_data;
1758 s->s.h.filter.sharpness = -1;
1760 return init_frames(avctx);
1764 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1767 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1769 for (i = 0; i < 3; i++) {
1770 if (s->s.frames[i].tf.f->buf[0])
1771 vp9_frame_unref(dst, &s->s.frames[i]);
1772 if (ssrc->s.frames[i].tf.f->buf[0]) {
1773 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1777 for (i = 0; i < 8; i++) {
1778 if (s->s.refs[i].f->buf[0])
1779 ff_thread_release_buffer(dst, &s->s.refs[i]);
1780 if (ssrc->next_refs[i].f->buf[0]) {
1781 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1786 s->s.h.invisible = ssrc->s.h.invisible;
1787 s->s.h.keyframe = ssrc->s.h.keyframe;
1788 s->s.h.intraonly = ssrc->s.h.intraonly;
1789 s->ss_v = ssrc->ss_v;
1790 s->ss_h = ssrc->ss_h;
1791 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1792 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1793 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1794 s->bytesperpixel = ssrc->bytesperpixel;
1795 s->gf_fmt = ssrc->gf_fmt;
1798 s->s.h.bpp = ssrc->s.h.bpp;
1799 s->bpp_index = ssrc->bpp_index;
1800 s->pix_fmt = ssrc->pix_fmt;
1801 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1802 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1803 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1804 sizeof(s->s.h.segmentation.feat));
1810 AVCodec ff_vp9_decoder = {
1812 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1813 .type = AVMEDIA_TYPE_VIDEO,
1814 .id = AV_CODEC_ID_VP9,
1815 .priv_data_size = sizeof(VP9Context),
1816 .init = vp9_decode_init,
1817 .close = vp9_decode_free,
1818 .decode = vp9_decode_frame,
1819 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1820 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1821 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1822 .flush = vp9_decode_flush,
1823 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1824 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1825 .bsfs = "vp9_superframe_split",
1826 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1827 #if CONFIG_VP9_DXVA2_HWACCEL
1830 #if CONFIG_VP9_D3D11VA_HWACCEL
1831 HWACCEL_D3D11VA(vp9),
1833 #if CONFIG_VP9_D3D11VA2_HWACCEL
1834 HWACCEL_D3D11VA2(vp9),
1836 #if CONFIG_VP9_NVDEC_HWACCEL
1839 #if CONFIG_VP9_VAAPI_HWACCEL
1842 #if CONFIG_VP9_VDPAU_HWACCEL