2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
38 #define VP9_SYNCCODE 0x498342
41 static void vp9_free_entries(AVCodecContext *avctx) {
42 VP9Context *s = avctx->priv_data;
44 if (avctx->active_thread_type & FF_THREAD_SLICE) {
45 pthread_mutex_destroy(&s->progress_mutex);
46 pthread_cond_destroy(&s->progress_cond);
47 av_freep(&s->entries);
51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
52 VP9Context *s = avctx->priv_data;
55 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 av_freep(&s->entries);
59 s->entries = av_malloc_array(n, sizeof(atomic_int));
62 av_freep(&s->entries);
63 return AVERROR(ENOMEM);
66 for (i = 0; i < n; i++)
67 atomic_init(&s->entries[i], 0);
69 pthread_mutex_init(&s->progress_mutex, NULL);
70 pthread_cond_init(&s->progress_cond, NULL);
75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
76 pthread_mutex_lock(&s->progress_mutex);
77 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
78 pthread_cond_signal(&s->progress_cond);
79 pthread_mutex_unlock(&s->progress_mutex);
82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
83 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
86 pthread_mutex_lock(&s->progress_mutex);
87 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
88 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
89 pthread_mutex_unlock(&s->progress_mutex);
92 static void vp9_free_entries(AVCodecContext *avctx) {}
93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
98 ff_thread_release_buffer(avctx, &f->tf);
99 av_buffer_unref(&f->extradata);
100 av_buffer_unref(&f->hwaccel_priv_buf);
101 f->segmentation_map = NULL;
102 f->hwaccel_picture_private = NULL;
105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
107 VP9Context *s = avctx->priv_data;
110 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
114 sz = 64 * s->sb_cols * s->sb_rows;
115 if (sz != s->frame_extradata_pool_size) {
116 av_buffer_pool_uninit(&s->frame_extradata_pool);
117 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
118 if (!s->frame_extradata_pool) {
119 s->frame_extradata_pool_size = 0;
122 s->frame_extradata_pool_size = sz;
124 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
128 memset(f->extradata->data, 0, f->extradata->size);
130 f->segmentation_map = f->extradata->data;
131 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
133 if (avctx->hwaccel) {
134 const AVHWAccel *hwaccel = avctx->hwaccel;
135 av_assert0(!f->hwaccel_picture_private);
136 if (hwaccel->frame_priv_data_size) {
137 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
138 if (!f->hwaccel_priv_buf)
140 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
147 vp9_frame_unref(avctx, f);
148 return AVERROR(ENOMEM);
151 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
155 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
159 dst->extradata = av_buffer_ref(src->extradata);
163 dst->segmentation_map = src->segmentation_map;
165 dst->uses_2pass = src->uses_2pass;
167 if (src->hwaccel_picture_private) {
168 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
169 if (!dst->hwaccel_priv_buf)
171 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
177 vp9_frame_unref(avctx, dst);
178 return AVERROR(ENOMEM);
181 static int update_size(AVCodecContext *avctx, int w, int h)
183 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
184 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
185 CONFIG_VP9_NVDEC_HWACCEL + \
186 CONFIG_VP9_VAAPI_HWACCEL + \
187 CONFIG_VP9_VDPAU_HWACCEL)
188 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
189 VP9Context *s = avctx->priv_data;
191 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
194 av_assert0(w > 0 && h > 0);
196 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
197 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
200 switch (s->pix_fmt) {
201 case AV_PIX_FMT_YUV420P:
202 #if CONFIG_VP9_VDPAU_HWACCEL
203 *fmtp++ = AV_PIX_FMT_VDPAU;
205 case AV_PIX_FMT_YUV420P10:
206 #if CONFIG_VP9_DXVA2_HWACCEL
207 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
209 #if CONFIG_VP9_D3D11VA_HWACCEL
210 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
211 *fmtp++ = AV_PIX_FMT_D3D11;
213 #if CONFIG_VP9_NVDEC_HWACCEL
214 *fmtp++ = AV_PIX_FMT_CUDA;
216 #if CONFIG_VP9_VAAPI_HWACCEL
217 *fmtp++ = AV_PIX_FMT_VAAPI;
220 case AV_PIX_FMT_YUV420P12:
221 #if CONFIG_VP9_NVDEC_HWACCEL
222 *fmtp++ = AV_PIX_FMT_CUDA;
224 #if CONFIG_VP9_VAAPI_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VAAPI;
230 *fmtp++ = s->pix_fmt;
231 *fmtp = AV_PIX_FMT_NONE;
233 ret = ff_thread_get_format(avctx, pix_fmts);
237 avctx->pix_fmt = ret;
238 s->gf_fmt = s->pix_fmt;
246 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
249 s->last_fmt = s->pix_fmt;
250 s->sb_cols = (w + 63) >> 6;
251 s->sb_rows = (h + 63) >> 6;
252 s->cols = (w + 7) >> 3;
253 s->rows = (h + 7) >> 3;
254 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
256 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
257 av_freep(&s->intra_pred_data[0]);
258 // FIXME we slightly over-allocate here for subsampled chroma, but a little
259 // bit of padding shouldn't affect performance...
260 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
261 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
263 return AVERROR(ENOMEM);
264 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
265 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
266 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
267 assign(s->above_y_nnz_ctx, uint8_t *, 16);
268 assign(s->above_mode_ctx, uint8_t *, 16);
269 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
270 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
271 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
272 assign(s->above_partition_ctx, uint8_t *, 8);
273 assign(s->above_skip_ctx, uint8_t *, 8);
274 assign(s->above_txfm_ctx, uint8_t *, 8);
275 assign(s->above_segpred_ctx, uint8_t *, 8);
276 assign(s->above_intra_ctx, uint8_t *, 8);
277 assign(s->above_comp_ctx, uint8_t *, 8);
278 assign(s->above_ref_ctx, uint8_t *, 8);
279 assign(s->above_filter_ctx, uint8_t *, 8);
280 assign(s->lflvl, VP9Filter *, lflvl_len);
284 for (i = 0; i < s->active_tile_cols; i++) {
285 av_freep(&s->td[i].b_base);
286 av_freep(&s->td[i].block_base);
290 if (s->s.h.bpp != s->last_bpp) {
291 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
292 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
293 s->last_bpp = s->s.h.bpp;
299 static int update_block_buffers(AVCodecContext *avctx)
302 VP9Context *s = avctx->priv_data;
303 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
304 VP9TileData *td = &s->td[0];
306 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
310 av_free(td->block_base);
311 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
312 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
313 if (s->s.frames[CUR_FRAME].uses_2pass) {
314 int sbs = s->sb_cols * s->sb_rows;
316 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
317 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
318 16 * 16 + 2 * chroma_eobs) * sbs);
319 if (!td->b_base || !td->block_base)
320 return AVERROR(ENOMEM);
321 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
322 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
323 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
324 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
325 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
327 for (i = 1; i < s->active_tile_cols; i++) {
328 if (s->td[i].b_base && s->td[i].block_base) {
329 av_free(s->td[i].b_base);
330 av_free(s->td[i].block_base);
333 for (i = 0; i < s->active_tile_cols; i++) {
334 s->td[i].b_base = av_malloc(sizeof(VP9Block));
335 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
336 16 * 16 + 2 * chroma_eobs);
337 if (!s->td[i].b_base || !s->td[i].block_base)
338 return AVERROR(ENOMEM);
339 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
340 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
341 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
342 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
343 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
346 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
351 // The sign bit is at the end, not the start, of a bit sequence
352 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
354 int v = get_bits(gb, n);
355 return get_bits1(gb) ? -v : v;
358 static av_always_inline int inv_recenter_nonneg(int v, int m)
363 return m - ((v + 1) >> 1);
367 // differential forward probability updates
368 static int update_prob(VP56RangeCoder *c, int p)
370 static const uint8_t inv_map_table[255] = {
371 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
372 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
373 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
374 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
375 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
376 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
377 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
378 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
379 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
380 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
381 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
382 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
383 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
384 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
385 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
386 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
387 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
388 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
393 /* This code is trying to do a differential probability update. For a
394 * current probability A in the range [1, 255], the difference to a new
395 * probability of any value can be expressed differentially as 1-A, 255-A
396 * where some part of this (absolute range) exists both in positive as
397 * well as the negative part, whereas another part only exists in one
398 * half. We're trying to code this shared part differentially, i.e.
399 * times two where the value of the lowest bit specifies the sign, and
400 * the single part is then coded on top of this. This absolute difference
401 * then again has a value of [0, 254], but a bigger value in this range
402 * indicates that we're further away from the original value A, so we
403 * can code this as a VLC code, since higher values are increasingly
404 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
405 * updates vs. the 'fine, exact' updates further down the range, which
406 * adds one extra dimension to this differential update model. */
408 if (!vp8_rac_get(c)) {
409 d = vp8_rac_get_uint(c, 4) + 0;
410 } else if (!vp8_rac_get(c)) {
411 d = vp8_rac_get_uint(c, 4) + 16;
412 } else if (!vp8_rac_get(c)) {
413 d = vp8_rac_get_uint(c, 5) + 32;
415 d = vp8_rac_get_uint(c, 7);
417 d = (d << 1) - 65 + vp8_rac_get(c);
419 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
422 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
423 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
426 static int read_colorspace_details(AVCodecContext *avctx)
428 static const enum AVColorSpace colorspaces[8] = {
429 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
430 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
432 VP9Context *s = avctx->priv_data;
433 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
436 s->s.h.bpp = 8 + bits * 2;
437 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
438 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
439 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
440 static const enum AVPixelFormat pix_fmt_rgb[3] = {
441 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
443 s->ss_h = s->ss_v = 0;
444 avctx->color_range = AVCOL_RANGE_JPEG;
445 s->pix_fmt = pix_fmt_rgb[bits];
446 if (avctx->profile & 1) {
447 if (get_bits1(&s->gb)) {
448 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
449 return AVERROR_INVALIDDATA;
452 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
454 return AVERROR_INVALIDDATA;
457 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
458 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
459 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
460 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
461 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
462 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
463 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
465 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
466 if (avctx->profile & 1) {
467 s->ss_h = get_bits1(&s->gb);
468 s->ss_v = get_bits1(&s->gb);
469 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
470 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
471 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
473 return AVERROR_INVALIDDATA;
474 } else if (get_bits1(&s->gb)) {
475 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
477 return AVERROR_INVALIDDATA;
480 s->ss_h = s->ss_v = 1;
481 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
488 static int decode_frame_header(AVCodecContext *avctx,
489 const uint8_t *data, int size, int *ref)
491 VP9Context *s = avctx->priv_data;
492 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
494 const uint8_t *data2;
497 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
498 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
501 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
502 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
503 return AVERROR_INVALIDDATA;
505 avctx->profile = get_bits1(&s->gb);
506 avctx->profile |= get_bits1(&s->gb) << 1;
507 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
508 if (avctx->profile > 3) {
509 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
510 return AVERROR_INVALIDDATA;
512 s->s.h.profile = avctx->profile;
513 if (get_bits1(&s->gb)) {
514 *ref = get_bits(&s->gb, 3);
518 s->last_keyframe = s->s.h.keyframe;
519 s->s.h.keyframe = !get_bits1(&s->gb);
521 last_invisible = s->s.h.invisible;
522 s->s.h.invisible = !get_bits1(&s->gb);
523 s->s.h.errorres = get_bits1(&s->gb);
524 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
526 if (s->s.h.keyframe) {
527 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
528 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
529 return AVERROR_INVALIDDATA;
531 if ((ret = read_colorspace_details(avctx)) < 0)
533 // for profile 1, here follows the subsampling bits
534 s->s.h.refreshrefmask = 0xff;
535 w = get_bits(&s->gb, 16) + 1;
536 h = get_bits(&s->gb, 16) + 1;
537 if (get_bits1(&s->gb)) // display size
538 skip_bits(&s->gb, 32);
540 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
541 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
542 if (s->s.h.intraonly) {
543 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
544 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
545 return AVERROR_INVALIDDATA;
547 if (avctx->profile >= 1) {
548 if ((ret = read_colorspace_details(avctx)) < 0)
551 s->ss_h = s->ss_v = 1;
554 s->bytesperpixel = 1;
555 s->pix_fmt = AV_PIX_FMT_YUV420P;
556 avctx->colorspace = AVCOL_SPC_BT470BG;
557 avctx->color_range = AVCOL_RANGE_MPEG;
559 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
560 w = get_bits(&s->gb, 16) + 1;
561 h = get_bits(&s->gb, 16) + 1;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
565 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
566 s->s.h.refidx[0] = get_bits(&s->gb, 3);
567 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
568 s->s.h.refidx[1] = get_bits(&s->gb, 3);
569 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
570 s->s.h.refidx[2] = get_bits(&s->gb, 3);
571 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
572 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
573 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
574 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
575 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
576 return AVERROR_INVALIDDATA;
578 if (get_bits1(&s->gb)) {
579 w = s->s.refs[s->s.h.refidx[0]].f->width;
580 h = s->s.refs[s->s.h.refidx[0]].f->height;
581 } else if (get_bits1(&s->gb)) {
582 w = s->s.refs[s->s.h.refidx[1]].f->width;
583 h = s->s.refs[s->s.h.refidx[1]].f->height;
584 } else if (get_bits1(&s->gb)) {
585 w = s->s.refs[s->s.h.refidx[2]].f->width;
586 h = s->s.refs[s->s.h.refidx[2]].f->height;
588 w = get_bits(&s->gb, 16) + 1;
589 h = get_bits(&s->gb, 16) + 1;
591 // Note that in this code, "CUR_FRAME" is actually before we
592 // have formally allocated a frame, and thus actually represents
594 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
595 s->s.frames[CUR_FRAME].tf.f->height == h;
596 if (get_bits1(&s->gb)) // display size
597 skip_bits(&s->gb, 32);
598 s->s.h.highprecisionmvs = get_bits1(&s->gb);
599 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
601 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
602 s->s.h.signbias[0] != s->s.h.signbias[2];
603 if (s->s.h.allowcompinter) {
604 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
605 s->s.h.fixcompref = 2;
606 s->s.h.varcompref[0] = 0;
607 s->s.h.varcompref[1] = 1;
608 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
609 s->s.h.fixcompref = 1;
610 s->s.h.varcompref[0] = 0;
611 s->s.h.varcompref[1] = 2;
613 s->s.h.fixcompref = 0;
614 s->s.h.varcompref[0] = 1;
615 s->s.h.varcompref[1] = 2;
620 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
621 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
622 s->s.h.framectxid = c = get_bits(&s->gb, 2);
623 if (s->s.h.keyframe || s->s.h.intraonly)
624 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
626 /* loopfilter header data */
627 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
628 // reset loopfilter defaults
629 s->s.h.lf_delta.ref[0] = 1;
630 s->s.h.lf_delta.ref[1] = 0;
631 s->s.h.lf_delta.ref[2] = -1;
632 s->s.h.lf_delta.ref[3] = -1;
633 s->s.h.lf_delta.mode[0] = 0;
634 s->s.h.lf_delta.mode[1] = 0;
635 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
637 s->s.h.filter.level = get_bits(&s->gb, 6);
638 sharp = get_bits(&s->gb, 3);
639 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
640 // the old cache values since they are still valid
641 if (s->s.h.filter.sharpness != sharp) {
642 for (i = 1; i <= 63; i++) {
646 limit >>= (sharp + 3) >> 2;
647 limit = FFMIN(limit, 9 - sharp);
649 limit = FFMAX(limit, 1);
651 s->filter_lut.lim_lut[i] = limit;
652 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
655 s->s.h.filter.sharpness = sharp;
656 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
657 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
658 for (i = 0; i < 4; i++)
659 if (get_bits1(&s->gb))
660 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
661 for (i = 0; i < 2; i++)
662 if (get_bits1(&s->gb))
663 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
667 /* quantization header data */
668 s->s.h.yac_qi = get_bits(&s->gb, 8);
669 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
670 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
671 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
672 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
673 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
675 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
677 /* segmentation header info */
678 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
679 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
680 for (i = 0; i < 7; i++)
681 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
682 get_bits(&s->gb, 8) : 255;
683 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
684 for (i = 0; i < 3; i++)
685 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
686 get_bits(&s->gb, 8) : 255;
689 if (get_bits1(&s->gb)) {
690 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
691 for (i = 0; i < 8; i++) {
692 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
693 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
694 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
695 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
696 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
697 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
698 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
703 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
704 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
705 int qyac, qydc, quvac, quvdc, lflvl, sh;
707 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
708 if (s->s.h.segmentation.absolute_vals)
709 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
711 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
713 qyac = s->s.h.yac_qi;
715 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
716 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
717 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
718 qyac = av_clip_uintp2(qyac, 8);
720 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
721 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
722 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
723 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
725 sh = s->s.h.filter.level >= 32;
726 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
727 if (s->s.h.segmentation.absolute_vals)
728 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
730 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
732 lflvl = s->s.h.filter.level;
734 if (s->s.h.lf_delta.enabled) {
735 s->s.h.segmentation.feat[i].lflvl[0][0] =
736 s->s.h.segmentation.feat[i].lflvl[0][1] =
737 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
738 for (j = 1; j < 4; j++) {
739 s->s.h.segmentation.feat[i].lflvl[j][0] =
740 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
741 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
742 s->s.h.segmentation.feat[i].lflvl[j][1] =
743 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
744 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
747 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
748 sizeof(s->s.h.segmentation.feat[i].lflvl));
753 if ((ret = update_size(avctx, w, h)) < 0) {
754 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
758 for (s->s.h.tiling.log2_tile_cols = 0;
759 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
760 s->s.h.tiling.log2_tile_cols++) ;
761 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
762 max = FFMAX(0, max - 1);
763 while (max > s->s.h.tiling.log2_tile_cols) {
764 if (get_bits1(&s->gb))
765 s->s.h.tiling.log2_tile_cols++;
769 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
770 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
771 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
776 for (i = 0; i < s->active_tile_cols; i++) {
777 av_free(s->td[i].b_base);
778 av_free(s->td[i].block_base);
783 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
784 vp9_free_entries(avctx);
785 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
786 s->s.h.tiling.tile_cols : 1;
787 vp9_alloc_entries(avctx, s->sb_rows);
788 if (avctx->active_thread_type == FF_THREAD_SLICE) {
789 n_range_coders = 4; // max_tile_rows
791 n_range_coders = s->s.h.tiling.tile_cols;
793 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
794 n_range_coders * sizeof(VP56RangeCoder));
796 return AVERROR(ENOMEM);
797 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
798 for (i = 0; i < s->active_tile_cols; i++) {
801 rc += n_range_coders;
805 /* check reference frames */
806 if (!s->s.h.keyframe && !s->s.h.intraonly) {
807 for (i = 0; i < 3; i++) {
808 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
809 int refw = ref->width, refh = ref->height;
811 if (ref->format != avctx->pix_fmt) {
812 av_log(avctx, AV_LOG_ERROR,
813 "Ref pixfmt (%s) did not match current frame (%s)",
814 av_get_pix_fmt_name(ref->format),
815 av_get_pix_fmt_name(avctx->pix_fmt));
816 return AVERROR_INVALIDDATA;
817 } else if (refw == w && refh == h) {
818 s->mvscale[i][0] = s->mvscale[i][1] = 0;
820 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
821 av_log(avctx, AV_LOG_ERROR,
822 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
824 return AVERROR_INVALIDDATA;
826 s->mvscale[i][0] = (refw << 14) / w;
827 s->mvscale[i][1] = (refh << 14) / h;
828 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
829 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
834 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
835 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
836 s->prob_ctx[3].p = ff_vp9_default_probs;
837 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
838 sizeof(ff_vp9_default_coef_probs));
839 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
840 sizeof(ff_vp9_default_coef_probs));
841 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
842 sizeof(ff_vp9_default_coef_probs));
843 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
844 sizeof(ff_vp9_default_coef_probs));
845 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
846 s->prob_ctx[c].p = ff_vp9_default_probs;
847 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
848 sizeof(ff_vp9_default_coef_probs));
851 // next 16 bits is size of the rest of the header (arith-coded)
852 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
853 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
855 data2 = align_get_bits(&s->gb);
856 if (size2 > size - (data2 - data)) {
857 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
858 return AVERROR_INVALIDDATA;
860 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
864 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
865 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
866 return AVERROR_INVALIDDATA;
869 for (i = 0; i < s->active_tile_cols; i++) {
870 if (s->s.h.keyframe || s->s.h.intraonly) {
871 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
872 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
874 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
878 /* FIXME is it faster to not copy here, but do it down in the fw updates
879 * as explicit copies if the fw update is missing (and skip the copy upon
881 s->prob.p = s->prob_ctx[c].p;
884 if (s->s.h.lossless) {
885 s->s.h.txfmmode = TX_4X4;
887 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
888 if (s->s.h.txfmmode == 3)
889 s->s.h.txfmmode += vp8_rac_get(&s->c);
891 if (s->s.h.txfmmode == TX_SWITCHABLE) {
892 for (i = 0; i < 2; i++)
893 if (vp56_rac_get_prob_branchy(&s->c, 252))
894 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
895 for (i = 0; i < 2; i++)
896 for (j = 0; j < 2; j++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.tx16p[i][j] =
899 update_prob(&s->c, s->prob.p.tx16p[i][j]);
900 for (i = 0; i < 2; i++)
901 for (j = 0; j < 3; j++)
902 if (vp56_rac_get_prob_branchy(&s->c, 252))
903 s->prob.p.tx32p[i][j] =
904 update_prob(&s->c, s->prob.p.tx32p[i][j]);
909 for (i = 0; i < 4; i++) {
910 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
911 if (vp8_rac_get(&s->c)) {
912 for (j = 0; j < 2; j++)
913 for (k = 0; k < 2; k++)
914 for (l = 0; l < 6; l++)
915 for (m = 0; m < 6; m++) {
916 uint8_t *p = s->prob.coef[i][j][k][l][m];
917 uint8_t *r = ref[j][k][l][m];
918 if (m >= 3 && l == 0) // dc only has 3 pt
920 for (n = 0; n < 3; n++) {
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 p[n] = update_prob(&s->c, r[n]);
926 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
929 for (j = 0; j < 2; j++)
930 for (k = 0; k < 2; k++)
931 for (l = 0; l < 6; l++)
932 for (m = 0; m < 6; m++) {
933 uint8_t *p = s->prob.coef[i][j][k][l][m];
934 uint8_t *r = ref[j][k][l][m];
935 if (m > 3 && l == 0) // dc only has 3 pt
938 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
941 if (s->s.h.txfmmode == i)
946 for (i = 0; i < 3; i++)
947 if (vp56_rac_get_prob_branchy(&s->c, 252))
948 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
949 if (!s->s.h.keyframe && !s->s.h.intraonly) {
950 for (i = 0; i < 7; i++)
951 for (j = 0; j < 3; j++)
952 if (vp56_rac_get_prob_branchy(&s->c, 252))
953 s->prob.p.mv_mode[i][j] =
954 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
956 if (s->s.h.filtermode == FILTER_SWITCHABLE)
957 for (i = 0; i < 4; i++)
958 for (j = 0; j < 2; j++)
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
960 s->prob.p.filter[i][j] =
961 update_prob(&s->c, s->prob.p.filter[i][j]);
963 for (i = 0; i < 4; i++)
964 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
967 if (s->s.h.allowcompinter) {
968 s->s.h.comppredmode = vp8_rac_get(&s->c);
969 if (s->s.h.comppredmode)
970 s->s.h.comppredmode += vp8_rac_get(&s->c);
971 if (s->s.h.comppredmode == PRED_SWITCHABLE)
972 for (i = 0; i < 5; i++)
973 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 update_prob(&s->c, s->prob.p.comp[i]);
977 s->s.h.comppredmode = PRED_SINGLEREF;
980 if (s->s.h.comppredmode != PRED_COMPREF) {
981 for (i = 0; i < 5; i++) {
982 if (vp56_rac_get_prob_branchy(&s->c, 252))
983 s->prob.p.single_ref[i][0] =
984 update_prob(&s->c, s->prob.p.single_ref[i][0]);
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.single_ref[i][1] =
987 update_prob(&s->c, s->prob.p.single_ref[i][1]);
991 if (s->s.h.comppredmode != PRED_SINGLEREF) {
992 for (i = 0; i < 5; i++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.comp_ref[i] =
995 update_prob(&s->c, s->prob.p.comp_ref[i]);
998 for (i = 0; i < 4; i++)
999 for (j = 0; j < 9; j++)
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.y_mode[i][j] =
1002 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1004 for (i = 0; i < 4; i++)
1005 for (j = 0; j < 4; j++)
1006 for (k = 0; k < 3; k++)
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.partition[3 - i][j][k] =
1010 s->prob.p.partition[3 - i][j][k]);
1012 // mv fields don't use the update_prob subexp model for some reason
1013 for (i = 0; i < 3; i++)
1014 if (vp56_rac_get_prob_branchy(&s->c, 252))
1015 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 for (i = 0; i < 2; i++) {
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.mv_comp[i].sign =
1020 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1022 for (j = 0; j < 10; j++)
1023 if (vp56_rac_get_prob_branchy(&s->c, 252))
1024 s->prob.p.mv_comp[i].classes[j] =
1025 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1027 if (vp56_rac_get_prob_branchy(&s->c, 252))
1028 s->prob.p.mv_comp[i].class0 =
1029 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031 for (j = 0; j < 10; j++)
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].bits[j] =
1034 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1037 for (i = 0; i < 2; i++) {
1038 for (j = 0; j < 2; j++)
1039 for (k = 0; k < 3; k++)
1040 if (vp56_rac_get_prob_branchy(&s->c, 252))
1041 s->prob.p.mv_comp[i].class0_fp[j][k] =
1042 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1044 for (j = 0; j < 3; j++)
1045 if (vp56_rac_get_prob_branchy(&s->c, 252))
1046 s->prob.p.mv_comp[i].fp[j] =
1047 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050 if (s->s.h.highprecisionmvs) {
1051 for (i = 0; i < 2; i++) {
1052 if (vp56_rac_get_prob_branchy(&s->c, 252))
1053 s->prob.p.mv_comp[i].class0_hp =
1054 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1056 if (vp56_rac_get_prob_branchy(&s->c, 252))
1057 s->prob.p.mv_comp[i].hp =
1058 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1063 return (data2 - data) + size2;
1066 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1067 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1069 const VP9Context *s = td->s;
1070 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1071 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1072 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1073 s->prob.p.partition[bl][c];
1074 enum BlockPartition bp;
1075 ptrdiff_t hbs = 4 >> bl;
1076 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1077 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1078 int bytesperpixel = s->bytesperpixel;
1081 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1082 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1083 } else if (col + hbs < s->cols) { // FIXME why not <=?
1084 if (row + hbs < s->rows) { // FIXME why not <=?
1085 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1087 case PARTITION_NONE:
1088 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1091 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1092 yoff += hbs * 8 * y_stride;
1093 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1094 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1097 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1098 yoff += hbs * 8 * bytesperpixel;
1099 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1100 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1102 case PARTITION_SPLIT:
1103 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1104 decode_sb(td, row, col + hbs, lflvl,
1105 yoff + 8 * hbs * bytesperpixel,
1106 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1107 yoff += hbs * 8 * y_stride;
1108 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1109 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1110 decode_sb(td, row + hbs, col + hbs, lflvl,
1111 yoff + 8 * hbs * bytesperpixel,
1112 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1117 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1118 bp = PARTITION_SPLIT;
1119 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1120 decode_sb(td, row, col + hbs, lflvl,
1121 yoff + 8 * hbs * bytesperpixel,
1122 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1125 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1127 } else if (row + hbs < s->rows) { // FIXME why not <=?
1128 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1129 bp = PARTITION_SPLIT;
1130 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1131 yoff += hbs * 8 * y_stride;
1132 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1133 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1136 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1139 bp = PARTITION_SPLIT;
1140 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1142 td->counts.partition[bl][c][bp]++;
1145 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1146 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1148 const VP9Context *s = td->s;
1149 VP9Block *b = td->b;
1150 ptrdiff_t hbs = 4 >> bl;
1151 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1152 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1153 int bytesperpixel = s->bytesperpixel;
1156 av_assert2(b->bl == BL_8X8);
1157 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1158 } else if (td->b->bl == bl) {
1159 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1160 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1161 yoff += hbs * 8 * y_stride;
1162 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1163 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1164 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1165 yoff += hbs * 8 * bytesperpixel;
1166 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1167 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1170 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1171 if (col + hbs < s->cols) { // FIXME why not <=?
1172 if (row + hbs < s->rows) {
1173 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1174 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1175 yoff += hbs * 8 * y_stride;
1176 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1177 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1178 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1179 yoff + 8 * hbs * bytesperpixel,
1180 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1182 yoff += hbs * 8 * bytesperpixel;
1183 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1184 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1186 } else if (row + hbs < s->rows) {
1187 yoff += hbs * 8 * y_stride;
1188 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1189 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1194 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1196 int sb_start = ( idx * n) >> log2_n;
1197 int sb_end = ((idx + 1) * n) >> log2_n;
1198 *start = FFMIN(sb_start, n) << 3;
1199 *end = FFMIN(sb_end, n) << 3;
1202 static void free_buffers(VP9Context *s)
1206 av_freep(&s->intra_pred_data[0]);
1207 for (i = 0; i < s->active_tile_cols; i++) {
1208 av_freep(&s->td[i].b_base);
1209 av_freep(&s->td[i].block_base);
1213 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1215 VP9Context *s = avctx->priv_data;
1218 for (i = 0; i < 3; i++) {
1219 vp9_frame_unref(avctx, &s->s.frames[i]);
1220 av_frame_free(&s->s.frames[i].tf.f);
1222 av_buffer_pool_uninit(&s->frame_extradata_pool);
1223 for (i = 0; i < 8; i++) {
1224 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1225 av_frame_free(&s->s.refs[i].f);
1226 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1227 av_frame_free(&s->next_refs[i].f);
1231 vp9_free_entries(avctx);
1236 static int decode_tiles(AVCodecContext *avctx,
1237 const uint8_t *data, int size)
1239 VP9Context *s = avctx->priv_data;
1240 VP9TileData *td = &s->td[0];
1241 int row, col, tile_row, tile_col, ret;
1243 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1245 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1247 f = s->s.frames[CUR_FRAME].tf.f;
1248 ls_y = f->linesize[0];
1249 ls_uv =f->linesize[1];
1250 bytesperpixel = s->bytesperpixel;
1253 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1254 set_tile_offset(&tile_row_start, &tile_row_end,
1255 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1257 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1260 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1261 tile_row == s->s.h.tiling.tile_rows - 1) {
1264 tile_size = AV_RB32(data);
1268 if (tile_size > size) {
1269 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1270 return AVERROR_INVALIDDATA;
1272 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1275 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1276 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1277 return AVERROR_INVALIDDATA;
1283 for (row = tile_row_start; row < tile_row_end;
1284 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1285 VP9Filter *lflvl_ptr = s->lflvl;
1286 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1288 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1289 set_tile_offset(&tile_col_start, &tile_col_end,
1290 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1291 td->tile_col_start = tile_col_start;
1293 memset(td->left_partition_ctx, 0, 8);
1294 memset(td->left_skip_ctx, 0, 8);
1295 if (s->s.h.keyframe || s->s.h.intraonly) {
1296 memset(td->left_mode_ctx, DC_PRED, 16);
1298 memset(td->left_mode_ctx, NEARESTMV, 8);
1300 memset(td->left_y_nnz_ctx, 0, 16);
1301 memset(td->left_uv_nnz_ctx, 0, 32);
1302 memset(td->left_segpred_ctx, 0, 8);
1304 td->c = &td->c_b[tile_col];
1307 for (col = tile_col_start;
1309 col += 8, yoff2 += 64 * bytesperpixel,
1310 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1311 // FIXME integrate with lf code (i.e. zero after each
1312 // use, similar to invtxfm coefficients, or similar)
1314 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1318 decode_sb_mem(td, row, col, lflvl_ptr,
1319 yoff2, uvoff2, BL_64X64);
1321 if (vpX_rac_is_end(td->c)) {
1322 return AVERROR_INVALIDDATA;
1324 decode_sb(td, row, col, lflvl_ptr,
1325 yoff2, uvoff2, BL_64X64);
1333 // backup pre-loopfilter reconstruction data for intra
1334 // prediction of next row of sb64s
1335 if (row + 8 < s->rows) {
1336 memcpy(s->intra_pred_data[0],
1337 f->data[0] + yoff + 63 * ls_y,
1338 8 * s->cols * bytesperpixel);
1339 memcpy(s->intra_pred_data[1],
1340 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1341 8 * s->cols * bytesperpixel >> s->ss_h);
1342 memcpy(s->intra_pred_data[2],
1343 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1344 8 * s->cols * bytesperpixel >> s->ss_h);
1347 // loopfilter one row
1348 if (s->s.h.filter.level) {
1351 lflvl_ptr = s->lflvl;
1352 for (col = 0; col < s->cols;
1353 col += 8, yoff2 += 64 * bytesperpixel,
1354 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1355 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1360 // FIXME maybe we can make this more finegrained by running the
1361 // loopfilter per-block instead of after each sbrow
1362 // In fact that would also make intra pred left preparation easier?
1363 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1370 static av_always_inline
1371 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1374 VP9Context *s = avctx->priv_data;
1375 VP9TileData *td = &s->td[jobnr];
1376 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1377 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1378 unsigned tile_cols_len;
1379 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1380 VP9Filter *lflvl_ptr_base;
1383 f = s->s.frames[CUR_FRAME].tf.f;
1384 ls_y = f->linesize[0];
1385 ls_uv =f->linesize[1];
1387 set_tile_offset(&tile_col_start, &tile_col_end,
1388 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1389 td->tile_col_start = tile_col_start;
1390 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1391 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1392 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1394 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1395 set_tile_offset(&tile_row_start, &tile_row_end,
1396 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1398 td->c = &td->c_b[tile_row];
1399 for (row = tile_row_start; row < tile_row_end;
1400 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1401 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1402 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1404 memset(td->left_partition_ctx, 0, 8);
1405 memset(td->left_skip_ctx, 0, 8);
1406 if (s->s.h.keyframe || s->s.h.intraonly) {
1407 memset(td->left_mode_ctx, DC_PRED, 16);
1409 memset(td->left_mode_ctx, NEARESTMV, 8);
1411 memset(td->left_y_nnz_ctx, 0, 16);
1412 memset(td->left_uv_nnz_ctx, 0, 32);
1413 memset(td->left_segpred_ctx, 0, 8);
1415 for (col = tile_col_start;
1417 col += 8, yoff2 += 64 * bytesperpixel,
1418 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1419 // FIXME integrate with lf code (i.e. zero after each
1420 // use, similar to invtxfm coefficients, or similar)
1421 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1422 decode_sb(td, row, col, lflvl_ptr,
1423 yoff2, uvoff2, BL_64X64);
1426 // backup pre-loopfilter reconstruction data for intra
1427 // prediction of next row of sb64s
1428 tile_cols_len = tile_col_end - tile_col_start;
1429 if (row + 8 < s->rows) {
1430 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1431 f->data[0] + yoff + 63 * ls_y,
1432 8 * tile_cols_len * bytesperpixel);
1433 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1434 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1435 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1436 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1437 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1438 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1441 vp9_report_tile_progress(s, row >> 3, 1);
1447 static av_always_inline
1448 int loopfilter_proc(AVCodecContext *avctx)
1450 VP9Context *s = avctx->priv_data;
1451 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1452 VP9Filter *lflvl_ptr;
1453 int bytesperpixel = s->bytesperpixel, col, i;
1456 f = s->s.frames[CUR_FRAME].tf.f;
1457 ls_y = f->linesize[0];
1458 ls_uv =f->linesize[1];
1460 for (i = 0; i < s->sb_rows; i++) {
1461 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1463 if (s->s.h.filter.level) {
1464 yoff = (ls_y * 64)*i;
1465 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1466 lflvl_ptr = s->lflvl+s->sb_cols*i;
1467 for (col = 0; col < s->cols;
1468 col += 8, yoff += 64 * bytesperpixel,
1469 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1470 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1479 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1480 int *got_frame, AVPacket *pkt)
1482 const uint8_t *data = pkt->data;
1483 int size = pkt->size;
1484 VP9Context *s = avctx->priv_data;
1486 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1487 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1490 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1492 } else if (ret == 0) {
1493 if (!s->s.refs[ref].f->buf[0]) {
1494 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1495 return AVERROR_INVALIDDATA;
1497 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1499 ((AVFrame *)frame)->pts = pkt->pts;
1501 FF_DISABLE_DEPRECATION_WARNINGS
1502 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1503 FF_ENABLE_DEPRECATION_WARNINGS
1505 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1506 for (i = 0; i < 8; i++) {
1507 if (s->next_refs[i].f->buf[0])
1508 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1509 if (s->s.refs[i].f->buf[0] &&
1510 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1519 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1520 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1521 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1522 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1523 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1526 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1527 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1528 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1529 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1531 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1532 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1533 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1535 f = s->s.frames[CUR_FRAME].tf.f;
1536 f->key_frame = s->s.h.keyframe;
1537 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1539 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1540 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1541 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1542 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1546 for (i = 0; i < 8; i++) {
1547 if (s->next_refs[i].f->buf[0])
1548 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1549 if (s->s.h.refreshrefmask & (1 << i)) {
1550 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1551 } else if (s->s.refs[i].f->buf[0]) {
1552 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1558 if (avctx->hwaccel) {
1559 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1562 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1565 ret = avctx->hwaccel->end_frame(avctx);
1571 // main tile decode loop
1572 memset(s->above_partition_ctx, 0, s->cols);
1573 memset(s->above_skip_ctx, 0, s->cols);
1574 if (s->s.h.keyframe || s->s.h.intraonly) {
1575 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1577 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1579 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1580 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1581 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1582 memset(s->above_segpred_ctx, 0, s->cols);
1583 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1584 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1585 if ((ret = update_block_buffers(avctx)) < 0) {
1586 av_log(avctx, AV_LOG_ERROR,
1587 "Failed to allocate block buffers\n");
1590 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1593 for (i = 0; i < 4; i++) {
1594 for (j = 0; j < 2; j++)
1595 for (k = 0; k < 2; k++)
1596 for (l = 0; l < 6; l++)
1597 for (m = 0; m < 6; m++)
1598 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1599 s->prob.coef[i][j][k][l][m], 3);
1600 if (s->s.h.txfmmode == i)
1603 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1604 ff_thread_finish_setup(avctx);
1605 } else if (!s->s.h.refreshctx) {
1606 ff_thread_finish_setup(avctx);
1610 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1611 for (i = 0; i < s->sb_rows; i++)
1612 atomic_store(&s->entries[i], 0);
1617 for (i = 0; i < s->active_tile_cols; i++) {
1618 s->td[i].b = s->td[i].b_base;
1619 s->td[i].block = s->td[i].block_base;
1620 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1621 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1622 s->td[i].eob = s->td[i].eob_base;
1623 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1624 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1628 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1629 int tile_row, tile_col;
1631 av_assert1(!s->pass);
1633 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1634 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1637 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1638 tile_row == s->s.h.tiling.tile_rows - 1) {
1641 tile_size = AV_RB32(data);
1645 if (tile_size > size)
1646 return AVERROR_INVALIDDATA;
1647 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1650 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1651 return AVERROR_INVALIDDATA;
1657 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1661 ret = decode_tiles(avctx, data, size);
1663 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1668 // Sum all counts fields into td[0].counts for tile threading
1669 if (avctx->active_thread_type == FF_THREAD_SLICE)
1670 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1671 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1672 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1674 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1675 ff_vp9_adapt_probs(s);
1676 ff_thread_finish_setup(avctx);
1678 } while (s->pass++ == 1);
1679 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1683 for (i = 0; i < 8; i++) {
1684 if (s->s.refs[i].f->buf[0])
1685 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1686 if (s->next_refs[i].f->buf[0] &&
1687 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1691 if (!s->s.h.invisible) {
1692 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1700 static void vp9_decode_flush(AVCodecContext *avctx)
1702 VP9Context *s = avctx->priv_data;
1705 for (i = 0; i < 3; i++)
1706 vp9_frame_unref(avctx, &s->s.frames[i]);
1707 for (i = 0; i < 8; i++)
1708 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1711 static int init_frames(AVCodecContext *avctx)
1713 VP9Context *s = avctx->priv_data;
1716 for (i = 0; i < 3; i++) {
1717 s->s.frames[i].tf.f = av_frame_alloc();
1718 if (!s->s.frames[i].tf.f) {
1719 vp9_decode_free(avctx);
1720 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1721 return AVERROR(ENOMEM);
1724 for (i = 0; i < 8; i++) {
1725 s->s.refs[i].f = av_frame_alloc();
1726 s->next_refs[i].f = av_frame_alloc();
1727 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1728 vp9_decode_free(avctx);
1729 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1730 return AVERROR(ENOMEM);
1737 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1739 VP9Context *s = avctx->priv_data;
1742 s->s.h.filter.sharpness = -1;
1744 return init_frames(avctx);
1748 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1751 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1753 for (i = 0; i < 3; i++) {
1754 if (s->s.frames[i].tf.f->buf[0])
1755 vp9_frame_unref(dst, &s->s.frames[i]);
1756 if (ssrc->s.frames[i].tf.f->buf[0]) {
1757 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1761 for (i = 0; i < 8; i++) {
1762 if (s->s.refs[i].f->buf[0])
1763 ff_thread_release_buffer(dst, &s->s.refs[i]);
1764 if (ssrc->next_refs[i].f->buf[0]) {
1765 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1770 s->s.h.invisible = ssrc->s.h.invisible;
1771 s->s.h.keyframe = ssrc->s.h.keyframe;
1772 s->s.h.intraonly = ssrc->s.h.intraonly;
1773 s->ss_v = ssrc->ss_v;
1774 s->ss_h = ssrc->ss_h;
1775 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1776 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1777 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1778 s->bytesperpixel = ssrc->bytesperpixel;
1779 s->gf_fmt = ssrc->gf_fmt;
1782 s->s.h.bpp = ssrc->s.h.bpp;
1783 s->bpp_index = ssrc->bpp_index;
1784 s->pix_fmt = ssrc->pix_fmt;
1785 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1786 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1787 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1788 sizeof(s->s.h.segmentation.feat));
1794 AVCodec ff_vp9_decoder = {
1796 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1797 .type = AVMEDIA_TYPE_VIDEO,
1798 .id = AV_CODEC_ID_VP9,
1799 .priv_data_size = sizeof(VP9Context),
1800 .init = vp9_decode_init,
1801 .close = vp9_decode_free,
1802 .decode = vp9_decode_frame,
1803 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1804 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF |
1805 FF_CODEC_CAP_ALLOCATE_PROGRESS,
1806 .flush = vp9_decode_flush,
1807 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1808 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1809 .bsfs = "vp9_superframe_split",
1810 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1811 #if CONFIG_VP9_DXVA2_HWACCEL
1814 #if CONFIG_VP9_D3D11VA_HWACCEL
1815 HWACCEL_D3D11VA(vp9),
1817 #if CONFIG_VP9_D3D11VA2_HWACCEL
1818 HWACCEL_D3D11VA2(vp9),
1820 #if CONFIG_VP9_NVDEC_HWACCEL
1823 #if CONFIG_VP9_VAAPI_HWACCEL
1826 #if CONFIG_VP9_VDPAU_HWACCEL