2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
38 #define VP9_SYNCCODE 0x498342
41 static void vp9_free_entries(AVCodecContext *avctx) {
42 VP9Context *s = avctx->priv_data;
44 if (avctx->active_thread_type & FF_THREAD_SLICE) {
45 pthread_mutex_destroy(&s->progress_mutex);
46 pthread_cond_destroy(&s->progress_cond);
47 av_freep(&s->entries);
51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
52 VP9Context *s = avctx->priv_data;
55 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 av_freep(&s->entries);
59 s->entries = av_malloc_array(n, sizeof(atomic_int));
62 av_freep(&s->entries);
63 return AVERROR(ENOMEM);
66 for (i = 0; i < n; i++)
67 atomic_init(&s->entries[i], 0);
69 pthread_mutex_init(&s->progress_mutex, NULL);
70 pthread_cond_init(&s->progress_cond, NULL);
75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
76 pthread_mutex_lock(&s->progress_mutex);
77 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
78 pthread_cond_signal(&s->progress_cond);
79 pthread_mutex_unlock(&s->progress_mutex);
82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
83 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
86 pthread_mutex_lock(&s->progress_mutex);
87 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
88 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
89 pthread_mutex_unlock(&s->progress_mutex);
92 static void vp9_free_entries(AVCodecContext *avctx) {}
93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
98 ff_thread_release_buffer(avctx, &f->tf);
99 av_buffer_unref(&f->extradata);
100 av_buffer_unref(&f->hwaccel_priv_buf);
101 f->segmentation_map = NULL;
102 f->hwaccel_picture_private = NULL;
105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
107 VP9Context *s = avctx->priv_data;
110 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
114 sz = 64 * s->sb_cols * s->sb_rows;
115 if (sz != s->frame_extradata_pool_size) {
116 av_buffer_pool_uninit(&s->frame_extradata_pool);
117 s->frame_extradata_pool = av_buffer_pool_init(sz * (1 + sizeof(VP9mvrefPair)), NULL);
118 if (!s->frame_extradata_pool) {
119 s->frame_extradata_pool_size = 0;
122 s->frame_extradata_pool_size = sz;
124 f->extradata = av_buffer_pool_get(s->frame_extradata_pool);
128 memset(f->extradata->data, 0, f->extradata->size);
130 f->segmentation_map = f->extradata->data;
131 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
133 if (avctx->hwaccel) {
134 const AVHWAccel *hwaccel = avctx->hwaccel;
135 av_assert0(!f->hwaccel_picture_private);
136 if (hwaccel->frame_priv_data_size) {
137 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
138 if (!f->hwaccel_priv_buf)
140 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
147 vp9_frame_unref(avctx, f);
148 return AVERROR(ENOMEM);
151 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
155 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
159 dst->extradata = av_buffer_ref(src->extradata);
163 dst->segmentation_map = src->segmentation_map;
165 dst->uses_2pass = src->uses_2pass;
167 if (src->hwaccel_picture_private) {
168 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
169 if (!dst->hwaccel_priv_buf)
171 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
177 vp9_frame_unref(avctx, dst);
178 return AVERROR(ENOMEM);
181 static int update_size(AVCodecContext *avctx, int w, int h)
183 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
184 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
185 CONFIG_VP9_NVDEC_HWACCEL + \
186 CONFIG_VP9_VAAPI_HWACCEL + \
187 CONFIG_VP9_VDPAU_HWACCEL)
188 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
189 VP9Context *s = avctx->priv_data;
191 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
194 av_assert0(w > 0 && h > 0);
196 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
197 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
200 switch (s->pix_fmt) {
201 case AV_PIX_FMT_YUV420P:
202 #if CONFIG_VP9_VDPAU_HWACCEL
203 *fmtp++ = AV_PIX_FMT_VDPAU;
205 case AV_PIX_FMT_YUV420P10:
206 #if CONFIG_VP9_DXVA2_HWACCEL
207 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
209 #if CONFIG_VP9_D3D11VA_HWACCEL
210 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
211 *fmtp++ = AV_PIX_FMT_D3D11;
213 #if CONFIG_VP9_NVDEC_HWACCEL
214 *fmtp++ = AV_PIX_FMT_CUDA;
216 #if CONFIG_VP9_VAAPI_HWACCEL
217 *fmtp++ = AV_PIX_FMT_VAAPI;
220 case AV_PIX_FMT_YUV420P12:
221 #if CONFIG_VP9_NVDEC_HWACCEL
222 *fmtp++ = AV_PIX_FMT_CUDA;
224 #if CONFIG_VP9_VAAPI_HWACCEL
225 *fmtp++ = AV_PIX_FMT_VAAPI;
230 *fmtp++ = s->pix_fmt;
231 *fmtp = AV_PIX_FMT_NONE;
233 ret = ff_thread_get_format(avctx, pix_fmts);
237 avctx->pix_fmt = ret;
238 s->gf_fmt = s->pix_fmt;
246 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
249 s->last_fmt = s->pix_fmt;
250 s->sb_cols = (w + 63) >> 6;
251 s->sb_rows = (h + 63) >> 6;
252 s->cols = (w + 7) >> 3;
253 s->rows = (h + 7) >> 3;
254 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
256 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
257 av_freep(&s->intra_pred_data[0]);
258 // FIXME we slightly over-allocate here for subsampled chroma, but a little
259 // bit of padding shouldn't affect performance...
260 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
261 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
263 return AVERROR(ENOMEM);
264 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
265 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
266 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
267 assign(s->above_y_nnz_ctx, uint8_t *, 16);
268 assign(s->above_mode_ctx, uint8_t *, 16);
269 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
270 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
271 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
272 assign(s->above_partition_ctx, uint8_t *, 8);
273 assign(s->above_skip_ctx, uint8_t *, 8);
274 assign(s->above_txfm_ctx, uint8_t *, 8);
275 assign(s->above_segpred_ctx, uint8_t *, 8);
276 assign(s->above_intra_ctx, uint8_t *, 8);
277 assign(s->above_comp_ctx, uint8_t *, 8);
278 assign(s->above_ref_ctx, uint8_t *, 8);
279 assign(s->above_filter_ctx, uint8_t *, 8);
280 assign(s->lflvl, VP9Filter *, lflvl_len);
284 for (i = 0; i < s->active_tile_cols; i++) {
285 av_freep(&s->td[i].b_base);
286 av_freep(&s->td[i].block_base);
290 if (s->s.h.bpp != s->last_bpp) {
291 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
292 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
293 s->last_bpp = s->s.h.bpp;
299 static int update_block_buffers(AVCodecContext *avctx)
302 VP9Context *s = avctx->priv_data;
303 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
304 VP9TileData *td = &s->td[0];
306 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
310 av_free(td->block_base);
311 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
312 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
313 if (s->s.frames[CUR_FRAME].uses_2pass) {
314 int sbs = s->sb_cols * s->sb_rows;
316 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
317 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
318 16 * 16 + 2 * chroma_eobs) * sbs);
319 if (!td->b_base || !td->block_base)
320 return AVERROR(ENOMEM);
321 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
322 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
323 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
324 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
325 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
327 for (i = 1; i < s->active_tile_cols; i++) {
328 if (s->td[i].b_base && s->td[i].block_base) {
329 av_free(s->td[i].b_base);
330 av_free(s->td[i].block_base);
333 for (i = 0; i < s->active_tile_cols; i++) {
334 s->td[i].b_base = av_malloc(sizeof(VP9Block));
335 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
336 16 * 16 + 2 * chroma_eobs);
337 if (!s->td[i].b_base || !s->td[i].block_base)
338 return AVERROR(ENOMEM);
339 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
340 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
341 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
342 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
343 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
346 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
351 // The sign bit is at the end, not the start, of a bit sequence
352 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
354 int v = get_bits(gb, n);
355 return get_bits1(gb) ? -v : v;
358 static av_always_inline int inv_recenter_nonneg(int v, int m)
363 return m - ((v + 1) >> 1);
367 // differential forward probability updates
368 static int update_prob(VP56RangeCoder *c, int p)
370 static const uint8_t inv_map_table[255] = {
371 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
372 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
373 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
374 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
375 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
376 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
377 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
378 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
379 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
380 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
381 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
382 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
383 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
384 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
385 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
386 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
387 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
388 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
393 /* This code is trying to do a differential probability update. For a
394 * current probability A in the range [1, 255], the difference to a new
395 * probability of any value can be expressed differentially as 1-A, 255-A
396 * where some part of this (absolute range) exists both in positive as
397 * well as the negative part, whereas another part only exists in one
398 * half. We're trying to code this shared part differentially, i.e.
399 * times two where the value of the lowest bit specifies the sign, and
400 * the single part is then coded on top of this. This absolute difference
401 * then again has a value of [0, 254], but a bigger value in this range
402 * indicates that we're further away from the original value A, so we
403 * can code this as a VLC code, since higher values are increasingly
404 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
405 * updates vs. the 'fine, exact' updates further down the range, which
406 * adds one extra dimension to this differential update model. */
408 if (!vp8_rac_get(c)) {
409 d = vp8_rac_get_uint(c, 4) + 0;
410 } else if (!vp8_rac_get(c)) {
411 d = vp8_rac_get_uint(c, 4) + 16;
412 } else if (!vp8_rac_get(c)) {
413 d = vp8_rac_get_uint(c, 5) + 32;
415 d = vp8_rac_get_uint(c, 7);
417 d = (d << 1) - 65 + vp8_rac_get(c);
419 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
422 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
423 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
426 static int read_colorspace_details(AVCodecContext *avctx)
428 static const enum AVColorSpace colorspaces[8] = {
429 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
430 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
432 VP9Context *s = avctx->priv_data;
433 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
436 s->s.h.bpp = 8 + bits * 2;
437 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
438 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
439 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
440 static const enum AVPixelFormat pix_fmt_rgb[3] = {
441 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
443 s->ss_h = s->ss_v = 0;
444 avctx->color_range = AVCOL_RANGE_JPEG;
445 s->pix_fmt = pix_fmt_rgb[bits];
446 if (avctx->profile & 1) {
447 if (get_bits1(&s->gb)) {
448 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
449 return AVERROR_INVALIDDATA;
452 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
454 return AVERROR_INVALIDDATA;
457 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
458 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
459 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
460 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
461 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
462 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
463 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
465 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
466 if (avctx->profile & 1) {
467 s->ss_h = get_bits1(&s->gb);
468 s->ss_v = get_bits1(&s->gb);
469 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
470 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
471 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
473 return AVERROR_INVALIDDATA;
474 } else if (get_bits1(&s->gb)) {
475 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
477 return AVERROR_INVALIDDATA;
480 s->ss_h = s->ss_v = 1;
481 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
488 static int decode_frame_header(AVCodecContext *avctx,
489 const uint8_t *data, int size, int *ref)
491 VP9Context *s = avctx->priv_data;
492 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
494 const uint8_t *data2;
497 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
498 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
501 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
502 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
503 return AVERROR_INVALIDDATA;
505 avctx->profile = get_bits1(&s->gb);
506 avctx->profile |= get_bits1(&s->gb) << 1;
507 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
508 if (avctx->profile > 3) {
509 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
510 return AVERROR_INVALIDDATA;
512 s->s.h.profile = avctx->profile;
513 if (get_bits1(&s->gb)) {
514 *ref = get_bits(&s->gb, 3);
518 s->last_keyframe = s->s.h.keyframe;
519 s->s.h.keyframe = !get_bits1(&s->gb);
521 last_invisible = s->s.h.invisible;
522 s->s.h.invisible = !get_bits1(&s->gb);
523 s->s.h.errorres = get_bits1(&s->gb);
524 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
526 if (s->s.h.keyframe) {
527 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
528 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
529 return AVERROR_INVALIDDATA;
531 if ((ret = read_colorspace_details(avctx)) < 0)
533 // for profile 1, here follows the subsampling bits
534 s->s.h.refreshrefmask = 0xff;
535 w = get_bits(&s->gb, 16) + 1;
536 h = get_bits(&s->gb, 16) + 1;
537 if (get_bits1(&s->gb)) // display size
538 skip_bits(&s->gb, 32);
540 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
541 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
542 if (s->s.h.intraonly) {
543 if (get_bits(&s->gb, 24) != VP9_SYNCCODE) { // synccode
544 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
545 return AVERROR_INVALIDDATA;
547 if (avctx->profile >= 1) {
548 if ((ret = read_colorspace_details(avctx)) < 0)
551 s->ss_h = s->ss_v = 1;
554 s->bytesperpixel = 1;
555 s->pix_fmt = AV_PIX_FMT_YUV420P;
556 avctx->colorspace = AVCOL_SPC_BT470BG;
557 avctx->color_range = AVCOL_RANGE_MPEG;
559 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
560 w = get_bits(&s->gb, 16) + 1;
561 h = get_bits(&s->gb, 16) + 1;
562 if (get_bits1(&s->gb)) // display size
563 skip_bits(&s->gb, 32);
565 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
566 s->s.h.refidx[0] = get_bits(&s->gb, 3);
567 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
568 s->s.h.refidx[1] = get_bits(&s->gb, 3);
569 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
570 s->s.h.refidx[2] = get_bits(&s->gb, 3);
571 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
572 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
573 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
574 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
575 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
576 return AVERROR_INVALIDDATA;
578 if (get_bits1(&s->gb)) {
579 w = s->s.refs[s->s.h.refidx[0]].f->width;
580 h = s->s.refs[s->s.h.refidx[0]].f->height;
581 } else if (get_bits1(&s->gb)) {
582 w = s->s.refs[s->s.h.refidx[1]].f->width;
583 h = s->s.refs[s->s.h.refidx[1]].f->height;
584 } else if (get_bits1(&s->gb)) {
585 w = s->s.refs[s->s.h.refidx[2]].f->width;
586 h = s->s.refs[s->s.h.refidx[2]].f->height;
588 w = get_bits(&s->gb, 16) + 1;
589 h = get_bits(&s->gb, 16) + 1;
591 // Note that in this code, "CUR_FRAME" is actually before we
592 // have formally allocated a frame, and thus actually represents
594 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
595 s->s.frames[CUR_FRAME].tf.f->height == h;
596 if (get_bits1(&s->gb)) // display size
597 skip_bits(&s->gb, 32);
598 s->s.h.highprecisionmvs = get_bits1(&s->gb);
599 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
601 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
602 s->s.h.signbias[0] != s->s.h.signbias[2];
603 if (s->s.h.allowcompinter) {
604 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
605 s->s.h.fixcompref = 2;
606 s->s.h.varcompref[0] = 0;
607 s->s.h.varcompref[1] = 1;
608 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
609 s->s.h.fixcompref = 1;
610 s->s.h.varcompref[0] = 0;
611 s->s.h.varcompref[1] = 2;
613 s->s.h.fixcompref = 0;
614 s->s.h.varcompref[0] = 1;
615 s->s.h.varcompref[1] = 2;
620 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
621 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
622 s->s.h.framectxid = c = get_bits(&s->gb, 2);
623 if (s->s.h.keyframe || s->s.h.intraonly)
624 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
626 /* loopfilter header data */
627 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
628 // reset loopfilter defaults
629 s->s.h.lf_delta.ref[0] = 1;
630 s->s.h.lf_delta.ref[1] = 0;
631 s->s.h.lf_delta.ref[2] = -1;
632 s->s.h.lf_delta.ref[3] = -1;
633 s->s.h.lf_delta.mode[0] = 0;
634 s->s.h.lf_delta.mode[1] = 0;
635 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
637 s->s.h.filter.level = get_bits(&s->gb, 6);
638 sharp = get_bits(&s->gb, 3);
639 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
640 // the old cache values since they are still valid
641 if (s->s.h.filter.sharpness != sharp) {
642 for (i = 1; i <= 63; i++) {
646 limit >>= (sharp + 3) >> 2;
647 limit = FFMIN(limit, 9 - sharp);
649 limit = FFMAX(limit, 1);
651 s->filter_lut.lim_lut[i] = limit;
652 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
655 s->s.h.filter.sharpness = sharp;
656 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
657 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
658 for (i = 0; i < 4; i++)
659 if (get_bits1(&s->gb))
660 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
661 for (i = 0; i < 2; i++)
662 if (get_bits1(&s->gb))
663 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
667 /* quantization header data */
668 s->s.h.yac_qi = get_bits(&s->gb, 8);
669 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
670 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
671 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
672 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
673 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
675 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
677 /* segmentation header info */
678 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
679 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
680 for (i = 0; i < 7; i++)
681 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
682 get_bits(&s->gb, 8) : 255;
683 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
684 for (i = 0; i < 3; i++)
685 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
686 get_bits(&s->gb, 8) : 255;
689 if (get_bits1(&s->gb)) {
690 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
691 for (i = 0; i < 8; i++) {
692 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
693 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
694 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
695 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
696 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
697 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
698 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
703 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
704 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
705 int qyac, qydc, quvac, quvdc, lflvl, sh;
707 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
708 if (s->s.h.segmentation.absolute_vals)
709 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
711 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
713 qyac = s->s.h.yac_qi;
715 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
716 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
717 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
718 qyac = av_clip_uintp2(qyac, 8);
720 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
721 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
722 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
723 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
725 sh = s->s.h.filter.level >= 32;
726 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
727 if (s->s.h.segmentation.absolute_vals)
728 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
730 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
732 lflvl = s->s.h.filter.level;
734 if (s->s.h.lf_delta.enabled) {
735 s->s.h.segmentation.feat[i].lflvl[0][0] =
736 s->s.h.segmentation.feat[i].lflvl[0][1] =
737 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
738 for (j = 1; j < 4; j++) {
739 s->s.h.segmentation.feat[i].lflvl[j][0] =
740 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
741 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
742 s->s.h.segmentation.feat[i].lflvl[j][1] =
743 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
744 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
747 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
748 sizeof(s->s.h.segmentation.feat[i].lflvl));
753 if ((ret = update_size(avctx, w, h)) < 0) {
754 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
758 for (s->s.h.tiling.log2_tile_cols = 0;
759 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
760 s->s.h.tiling.log2_tile_cols++) ;
761 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
762 max = FFMAX(0, max - 1);
763 while (max > s->s.h.tiling.log2_tile_cols) {
764 if (get_bits1(&s->gb))
765 s->s.h.tiling.log2_tile_cols++;
769 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
770 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
771 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
776 for (i = 0; i < s->active_tile_cols; i++) {
777 av_free(s->td[i].b_base);
778 av_free(s->td[i].block_base);
783 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
784 vp9_free_entries(avctx);
785 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
786 s->s.h.tiling.tile_cols : 1;
787 vp9_alloc_entries(avctx, s->sb_rows);
788 if (avctx->active_thread_type == FF_THREAD_SLICE) {
789 n_range_coders = 4; // max_tile_rows
791 n_range_coders = s->s.h.tiling.tile_cols;
793 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
794 n_range_coders * sizeof(VP56RangeCoder));
796 return AVERROR(ENOMEM);
797 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
798 for (i = 0; i < s->active_tile_cols; i++) {
801 rc += n_range_coders;
805 /* check reference frames */
806 if (!s->s.h.keyframe && !s->s.h.intraonly) {
807 for (i = 0; i < 3; i++) {
808 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
809 int refw = ref->width, refh = ref->height;
811 if (ref->format != avctx->pix_fmt) {
812 av_log(avctx, AV_LOG_ERROR,
813 "Ref pixfmt (%s) did not match current frame (%s)",
814 av_get_pix_fmt_name(ref->format),
815 av_get_pix_fmt_name(avctx->pix_fmt));
816 return AVERROR_INVALIDDATA;
817 } else if (refw == w && refh == h) {
818 s->mvscale[i][0] = s->mvscale[i][1] = 0;
820 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
821 av_log(avctx, AV_LOG_ERROR,
822 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
824 return AVERROR_INVALIDDATA;
826 s->mvscale[i][0] = (refw << 14) / w;
827 s->mvscale[i][1] = (refh << 14) / h;
828 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
829 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
834 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
835 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
836 s->prob_ctx[3].p = ff_vp9_default_probs;
837 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
838 sizeof(ff_vp9_default_coef_probs));
839 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
840 sizeof(ff_vp9_default_coef_probs));
841 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
842 sizeof(ff_vp9_default_coef_probs));
843 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
844 sizeof(ff_vp9_default_coef_probs));
845 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
846 s->prob_ctx[c].p = ff_vp9_default_probs;
847 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
848 sizeof(ff_vp9_default_coef_probs));
851 // next 16 bits is size of the rest of the header (arith-coded)
852 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
853 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
855 data2 = align_get_bits(&s->gb);
856 if (size2 > size - (data2 - data)) {
857 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
858 return AVERROR_INVALIDDATA;
860 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
864 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
865 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
866 return AVERROR_INVALIDDATA;
869 for (i = 0; i < s->active_tile_cols; i++) {
870 if (s->s.h.keyframe || s->s.h.intraonly) {
871 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
872 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
874 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
878 /* FIXME is it faster to not copy here, but do it down in the fw updates
879 * as explicit copies if the fw update is missing (and skip the copy upon
881 s->prob.p = s->prob_ctx[c].p;
884 if (s->s.h.lossless) {
885 s->s.h.txfmmode = TX_4X4;
887 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
888 if (s->s.h.txfmmode == 3)
889 s->s.h.txfmmode += vp8_rac_get(&s->c);
891 if (s->s.h.txfmmode == TX_SWITCHABLE) {
892 for (i = 0; i < 2; i++)
893 if (vp56_rac_get_prob_branchy(&s->c, 252))
894 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
895 for (i = 0; i < 2; i++)
896 for (j = 0; j < 2; j++)
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 s->prob.p.tx16p[i][j] =
899 update_prob(&s->c, s->prob.p.tx16p[i][j]);
900 for (i = 0; i < 2; i++)
901 for (j = 0; j < 3; j++)
902 if (vp56_rac_get_prob_branchy(&s->c, 252))
903 s->prob.p.tx32p[i][j] =
904 update_prob(&s->c, s->prob.p.tx32p[i][j]);
909 for (i = 0; i < 4; i++) {
910 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
911 if (vp8_rac_get(&s->c)) {
912 for (j = 0; j < 2; j++)
913 for (k = 0; k < 2; k++)
914 for (l = 0; l < 6; l++)
915 for (m = 0; m < 6; m++) {
916 uint8_t *p = s->prob.coef[i][j][k][l][m];
917 uint8_t *r = ref[j][k][l][m];
918 if (m >= 3 && l == 0) // dc only has 3 pt
920 for (n = 0; n < 3; n++) {
921 if (vp56_rac_get_prob_branchy(&s->c, 252))
922 p[n] = update_prob(&s->c, r[n]);
926 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
929 for (j = 0; j < 2; j++)
930 for (k = 0; k < 2; k++)
931 for (l = 0; l < 6; l++)
932 for (m = 0; m < 6; m++) {
933 uint8_t *p = s->prob.coef[i][j][k][l][m];
934 uint8_t *r = ref[j][k][l][m];
935 if (m > 3 && l == 0) // dc only has 3 pt
938 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
941 if (s->s.h.txfmmode == i)
946 for (i = 0; i < 3; i++)
947 if (vp56_rac_get_prob_branchy(&s->c, 252))
948 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
949 if (!s->s.h.keyframe && !s->s.h.intraonly) {
950 for (i = 0; i < 7; i++)
951 for (j = 0; j < 3; j++)
952 if (vp56_rac_get_prob_branchy(&s->c, 252))
953 s->prob.p.mv_mode[i][j] =
954 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
956 if (s->s.h.filtermode == FILTER_SWITCHABLE)
957 for (i = 0; i < 4; i++)
958 for (j = 0; j < 2; j++)
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
960 s->prob.p.filter[i][j] =
961 update_prob(&s->c, s->prob.p.filter[i][j]);
963 for (i = 0; i < 4; i++)
964 if (vp56_rac_get_prob_branchy(&s->c, 252))
965 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
967 if (s->s.h.allowcompinter) {
968 s->s.h.comppredmode = vp8_rac_get(&s->c);
969 if (s->s.h.comppredmode)
970 s->s.h.comppredmode += vp8_rac_get(&s->c);
971 if (s->s.h.comppredmode == PRED_SWITCHABLE)
972 for (i = 0; i < 5; i++)
973 if (vp56_rac_get_prob_branchy(&s->c, 252))
975 update_prob(&s->c, s->prob.p.comp[i]);
977 s->s.h.comppredmode = PRED_SINGLEREF;
980 if (s->s.h.comppredmode != PRED_COMPREF) {
981 for (i = 0; i < 5; i++) {
982 if (vp56_rac_get_prob_branchy(&s->c, 252))
983 s->prob.p.single_ref[i][0] =
984 update_prob(&s->c, s->prob.p.single_ref[i][0]);
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.single_ref[i][1] =
987 update_prob(&s->c, s->prob.p.single_ref[i][1]);
991 if (s->s.h.comppredmode != PRED_SINGLEREF) {
992 for (i = 0; i < 5; i++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.comp_ref[i] =
995 update_prob(&s->c, s->prob.p.comp_ref[i]);
998 for (i = 0; i < 4; i++)
999 for (j = 0; j < 9; j++)
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.y_mode[i][j] =
1002 update_prob(&s->c, s->prob.p.y_mode[i][j]);
1004 for (i = 0; i < 4; i++)
1005 for (j = 0; j < 4; j++)
1006 for (k = 0; k < 3; k++)
1007 if (vp56_rac_get_prob_branchy(&s->c, 252))
1008 s->prob.p.partition[3 - i][j][k] =
1010 s->prob.p.partition[3 - i][j][k]);
1012 // mv fields don't use the update_prob subexp model for some reason
1013 for (i = 0; i < 3; i++)
1014 if (vp56_rac_get_prob_branchy(&s->c, 252))
1015 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 for (i = 0; i < 2; i++) {
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.mv_comp[i].sign =
1020 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1022 for (j = 0; j < 10; j++)
1023 if (vp56_rac_get_prob_branchy(&s->c, 252))
1024 s->prob.p.mv_comp[i].classes[j] =
1025 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1027 if (vp56_rac_get_prob_branchy(&s->c, 252))
1028 s->prob.p.mv_comp[i].class0 =
1029 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1031 for (j = 0; j < 10; j++)
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].bits[j] =
1034 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1037 for (i = 0; i < 2; i++) {
1038 for (j = 0; j < 2; j++)
1039 for (k = 0; k < 3; k++)
1040 if (vp56_rac_get_prob_branchy(&s->c, 252))
1041 s->prob.p.mv_comp[i].class0_fp[j][k] =
1042 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1044 for (j = 0; j < 3; j++)
1045 if (vp56_rac_get_prob_branchy(&s->c, 252))
1046 s->prob.p.mv_comp[i].fp[j] =
1047 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1050 if (s->s.h.highprecisionmvs) {
1051 for (i = 0; i < 2; i++) {
1052 if (vp56_rac_get_prob_branchy(&s->c, 252))
1053 s->prob.p.mv_comp[i].class0_hp =
1054 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1056 if (vp56_rac_get_prob_branchy(&s->c, 252))
1057 s->prob.p.mv_comp[i].hp =
1058 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1063 return (data2 - data) + size2;
1066 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1067 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1069 const VP9Context *s = td->s;
1070 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1071 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1072 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1073 s->prob.p.partition[bl][c];
1074 enum BlockPartition bp;
1075 ptrdiff_t hbs = 4 >> bl;
1076 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1077 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1078 int bytesperpixel = s->bytesperpixel;
1081 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1082 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1083 } else if (col + hbs < s->cols) { // FIXME why not <=?
1084 if (row + hbs < s->rows) { // FIXME why not <=?
1085 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1087 case PARTITION_NONE:
1088 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1091 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1092 yoff += hbs * 8 * y_stride;
1093 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1094 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1097 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1098 yoff += hbs * 8 * bytesperpixel;
1099 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1100 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1102 case PARTITION_SPLIT:
1103 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1104 decode_sb(td, row, col + hbs, lflvl,
1105 yoff + 8 * hbs * bytesperpixel,
1106 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1107 yoff += hbs * 8 * y_stride;
1108 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1109 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1110 decode_sb(td, row + hbs, col + hbs, lflvl,
1111 yoff + 8 * hbs * bytesperpixel,
1112 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1117 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1118 bp = PARTITION_SPLIT;
1119 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1120 decode_sb(td, row, col + hbs, lflvl,
1121 yoff + 8 * hbs * bytesperpixel,
1122 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1125 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1127 } else if (row + hbs < s->rows) { // FIXME why not <=?
1128 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1129 bp = PARTITION_SPLIT;
1130 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1131 yoff += hbs * 8 * y_stride;
1132 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1133 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1136 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1139 bp = PARTITION_SPLIT;
1140 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1142 td->counts.partition[bl][c][bp]++;
1145 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1146 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1148 const VP9Context *s = td->s;
1149 VP9Block *b = td->b;
1150 ptrdiff_t hbs = 4 >> bl;
1151 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1152 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1153 int bytesperpixel = s->bytesperpixel;
1156 av_assert2(b->bl == BL_8X8);
1157 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1158 } else if (td->b->bl == bl) {
1159 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1160 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1161 yoff += hbs * 8 * y_stride;
1162 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1163 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1164 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1165 yoff += hbs * 8 * bytesperpixel;
1166 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1167 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1170 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1171 if (col + hbs < s->cols) { // FIXME why not <=?
1172 if (row + hbs < s->rows) {
1173 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1174 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1175 yoff += hbs * 8 * y_stride;
1176 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1177 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1178 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1179 yoff + 8 * hbs * bytesperpixel,
1180 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1182 yoff += hbs * 8 * bytesperpixel;
1183 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1184 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1186 } else if (row + hbs < s->rows) {
1187 yoff += hbs * 8 * y_stride;
1188 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1189 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1194 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1196 int sb_start = ( idx * n) >> log2_n;
1197 int sb_end = ((idx + 1) * n) >> log2_n;
1198 *start = FFMIN(sb_start, n) << 3;
1199 *end = FFMIN(sb_end, n) << 3;
1202 static void free_buffers(VP9Context *s)
1206 av_freep(&s->intra_pred_data[0]);
1207 for (i = 0; i < s->active_tile_cols; i++) {
1208 av_freep(&s->td[i].b_base);
1209 av_freep(&s->td[i].block_base);
1213 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1215 VP9Context *s = avctx->priv_data;
1218 for (i = 0; i < 3; i++) {
1219 if (s->s.frames[i].tf.f->buf[0])
1220 vp9_frame_unref(avctx, &s->s.frames[i]);
1221 av_frame_free(&s->s.frames[i].tf.f);
1223 av_buffer_pool_uninit(&s->frame_extradata_pool);
1224 for (i = 0; i < 8; i++) {
1225 if (s->s.refs[i].f->buf[0])
1226 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1227 av_frame_free(&s->s.refs[i].f);
1228 if (s->next_refs[i].f->buf[0])
1229 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1230 av_frame_free(&s->next_refs[i].f);
1234 vp9_free_entries(avctx);
1239 static int decode_tiles(AVCodecContext *avctx,
1240 const uint8_t *data, int size)
1242 VP9Context *s = avctx->priv_data;
1243 VP9TileData *td = &s->td[0];
1244 int row, col, tile_row, tile_col, ret;
1246 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1248 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1250 f = s->s.frames[CUR_FRAME].tf.f;
1251 ls_y = f->linesize[0];
1252 ls_uv =f->linesize[1];
1253 bytesperpixel = s->bytesperpixel;
1256 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1257 set_tile_offset(&tile_row_start, &tile_row_end,
1258 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1260 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1263 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1264 tile_row == s->s.h.tiling.tile_rows - 1) {
1267 tile_size = AV_RB32(data);
1271 if (tile_size > size) {
1272 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1273 return AVERROR_INVALIDDATA;
1275 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1278 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1279 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1280 return AVERROR_INVALIDDATA;
1286 for (row = tile_row_start; row < tile_row_end;
1287 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1288 VP9Filter *lflvl_ptr = s->lflvl;
1289 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1291 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1292 set_tile_offset(&tile_col_start, &tile_col_end,
1293 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1294 td->tile_col_start = tile_col_start;
1296 memset(td->left_partition_ctx, 0, 8);
1297 memset(td->left_skip_ctx, 0, 8);
1298 if (s->s.h.keyframe || s->s.h.intraonly) {
1299 memset(td->left_mode_ctx, DC_PRED, 16);
1301 memset(td->left_mode_ctx, NEARESTMV, 8);
1303 memset(td->left_y_nnz_ctx, 0, 16);
1304 memset(td->left_uv_nnz_ctx, 0, 32);
1305 memset(td->left_segpred_ctx, 0, 8);
1307 td->c = &td->c_b[tile_col];
1310 for (col = tile_col_start;
1312 col += 8, yoff2 += 64 * bytesperpixel,
1313 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1314 // FIXME integrate with lf code (i.e. zero after each
1315 // use, similar to invtxfm coefficients, or similar)
1317 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1321 decode_sb_mem(td, row, col, lflvl_ptr,
1322 yoff2, uvoff2, BL_64X64);
1324 if (vpX_rac_is_end(td->c)) {
1325 return AVERROR_INVALIDDATA;
1327 decode_sb(td, row, col, lflvl_ptr,
1328 yoff2, uvoff2, BL_64X64);
1336 // backup pre-loopfilter reconstruction data for intra
1337 // prediction of next row of sb64s
1338 if (row + 8 < s->rows) {
1339 memcpy(s->intra_pred_data[0],
1340 f->data[0] + yoff + 63 * ls_y,
1341 8 * s->cols * bytesperpixel);
1342 memcpy(s->intra_pred_data[1],
1343 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1344 8 * s->cols * bytesperpixel >> s->ss_h);
1345 memcpy(s->intra_pred_data[2],
1346 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1347 8 * s->cols * bytesperpixel >> s->ss_h);
1350 // loopfilter one row
1351 if (s->s.h.filter.level) {
1354 lflvl_ptr = s->lflvl;
1355 for (col = 0; col < s->cols;
1356 col += 8, yoff2 += 64 * bytesperpixel,
1357 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1358 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1363 // FIXME maybe we can make this more finegrained by running the
1364 // loopfilter per-block instead of after each sbrow
1365 // In fact that would also make intra pred left preparation easier?
1366 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1373 static av_always_inline
1374 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1377 VP9Context *s = avctx->priv_data;
1378 VP9TileData *td = &s->td[jobnr];
1379 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1380 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1381 unsigned tile_cols_len;
1382 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1383 VP9Filter *lflvl_ptr_base;
1386 f = s->s.frames[CUR_FRAME].tf.f;
1387 ls_y = f->linesize[0];
1388 ls_uv =f->linesize[1];
1390 set_tile_offset(&tile_col_start, &tile_col_end,
1391 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1392 td->tile_col_start = tile_col_start;
1393 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1394 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1395 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1397 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1398 set_tile_offset(&tile_row_start, &tile_row_end,
1399 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1401 td->c = &td->c_b[tile_row];
1402 for (row = tile_row_start; row < tile_row_end;
1403 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1404 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1405 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1407 memset(td->left_partition_ctx, 0, 8);
1408 memset(td->left_skip_ctx, 0, 8);
1409 if (s->s.h.keyframe || s->s.h.intraonly) {
1410 memset(td->left_mode_ctx, DC_PRED, 16);
1412 memset(td->left_mode_ctx, NEARESTMV, 8);
1414 memset(td->left_y_nnz_ctx, 0, 16);
1415 memset(td->left_uv_nnz_ctx, 0, 32);
1416 memset(td->left_segpred_ctx, 0, 8);
1418 for (col = tile_col_start;
1420 col += 8, yoff2 += 64 * bytesperpixel,
1421 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1422 // FIXME integrate with lf code (i.e. zero after each
1423 // use, similar to invtxfm coefficients, or similar)
1424 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1425 decode_sb(td, row, col, lflvl_ptr,
1426 yoff2, uvoff2, BL_64X64);
1429 // backup pre-loopfilter reconstruction data for intra
1430 // prediction of next row of sb64s
1431 tile_cols_len = tile_col_end - tile_col_start;
1432 if (row + 8 < s->rows) {
1433 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1434 f->data[0] + yoff + 63 * ls_y,
1435 8 * tile_cols_len * bytesperpixel);
1436 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1437 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1438 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1439 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1440 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1441 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1444 vp9_report_tile_progress(s, row >> 3, 1);
1450 static av_always_inline
1451 int loopfilter_proc(AVCodecContext *avctx)
1453 VP9Context *s = avctx->priv_data;
1454 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1455 VP9Filter *lflvl_ptr;
1456 int bytesperpixel = s->bytesperpixel, col, i;
1459 f = s->s.frames[CUR_FRAME].tf.f;
1460 ls_y = f->linesize[0];
1461 ls_uv =f->linesize[1];
1463 for (i = 0; i < s->sb_rows; i++) {
1464 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1466 if (s->s.h.filter.level) {
1467 yoff = (ls_y * 64)*i;
1468 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1469 lflvl_ptr = s->lflvl+s->sb_cols*i;
1470 for (col = 0; col < s->cols;
1471 col += 8, yoff += 64 * bytesperpixel,
1472 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1473 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1482 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1483 int *got_frame, AVPacket *pkt)
1485 const uint8_t *data = pkt->data;
1486 int size = pkt->size;
1487 VP9Context *s = avctx->priv_data;
1489 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1490 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1493 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1495 } else if (ret == 0) {
1496 if (!s->s.refs[ref].f->buf[0]) {
1497 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1498 return AVERROR_INVALIDDATA;
1500 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1502 ((AVFrame *)frame)->pts = pkt->pts;
1504 FF_DISABLE_DEPRECATION_WARNINGS
1505 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1506 FF_ENABLE_DEPRECATION_WARNINGS
1508 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1509 for (i = 0; i < 8; i++) {
1510 if (s->next_refs[i].f->buf[0])
1511 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1512 if (s->s.refs[i].f->buf[0] &&
1513 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1522 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1523 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1524 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1525 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1526 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1529 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1530 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1531 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1532 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1534 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1535 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1536 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1538 f = s->s.frames[CUR_FRAME].tf.f;
1539 f->key_frame = s->s.h.keyframe;
1540 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1542 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1543 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1544 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1545 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1549 for (i = 0; i < 8; i++) {
1550 if (s->next_refs[i].f->buf[0])
1551 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1552 if (s->s.h.refreshrefmask & (1 << i)) {
1553 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1554 } else if (s->s.refs[i].f->buf[0]) {
1555 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1561 if (avctx->hwaccel) {
1562 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1565 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1568 ret = avctx->hwaccel->end_frame(avctx);
1574 // main tile decode loop
1575 memset(s->above_partition_ctx, 0, s->cols);
1576 memset(s->above_skip_ctx, 0, s->cols);
1577 if (s->s.h.keyframe || s->s.h.intraonly) {
1578 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1580 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1582 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1583 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1584 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1585 memset(s->above_segpred_ctx, 0, s->cols);
1586 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1587 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1588 if ((ret = update_block_buffers(avctx)) < 0) {
1589 av_log(avctx, AV_LOG_ERROR,
1590 "Failed to allocate block buffers\n");
1593 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1596 for (i = 0; i < 4; i++) {
1597 for (j = 0; j < 2; j++)
1598 for (k = 0; k < 2; k++)
1599 for (l = 0; l < 6; l++)
1600 for (m = 0; m < 6; m++)
1601 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1602 s->prob.coef[i][j][k][l][m], 3);
1603 if (s->s.h.txfmmode == i)
1606 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1607 ff_thread_finish_setup(avctx);
1608 } else if (!s->s.h.refreshctx) {
1609 ff_thread_finish_setup(avctx);
1613 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1614 for (i = 0; i < s->sb_rows; i++)
1615 atomic_store(&s->entries[i], 0);
1620 for (i = 0; i < s->active_tile_cols; i++) {
1621 s->td[i].b = s->td[i].b_base;
1622 s->td[i].block = s->td[i].block_base;
1623 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1624 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1625 s->td[i].eob = s->td[i].eob_base;
1626 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1627 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1631 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1632 int tile_row, tile_col;
1634 av_assert1(!s->pass);
1636 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1637 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1640 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1641 tile_row == s->s.h.tiling.tile_rows - 1) {
1644 tile_size = AV_RB32(data);
1648 if (tile_size > size)
1649 return AVERROR_INVALIDDATA;
1650 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1653 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1654 return AVERROR_INVALIDDATA;
1660 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1664 ret = decode_tiles(avctx, data, size);
1666 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1671 // Sum all counts fields into td[0].counts for tile threading
1672 if (avctx->active_thread_type == FF_THREAD_SLICE)
1673 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1674 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1675 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1677 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1678 ff_vp9_adapt_probs(s);
1679 ff_thread_finish_setup(avctx);
1681 } while (s->pass++ == 1);
1682 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1686 for (i = 0; i < 8; i++) {
1687 if (s->s.refs[i].f->buf[0])
1688 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1689 if (s->next_refs[i].f->buf[0] &&
1690 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1694 if (!s->s.h.invisible) {
1695 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1703 static void vp9_decode_flush(AVCodecContext *avctx)
1705 VP9Context *s = avctx->priv_data;
1708 for (i = 0; i < 3; i++)
1709 vp9_frame_unref(avctx, &s->s.frames[i]);
1710 for (i = 0; i < 8; i++)
1711 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1714 static int init_frames(AVCodecContext *avctx)
1716 VP9Context *s = avctx->priv_data;
1719 for (i = 0; i < 3; i++) {
1720 s->s.frames[i].tf.f = av_frame_alloc();
1721 if (!s->s.frames[i].tf.f) {
1722 vp9_decode_free(avctx);
1723 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1724 return AVERROR(ENOMEM);
1727 for (i = 0; i < 8; i++) {
1728 s->s.refs[i].f = av_frame_alloc();
1729 s->next_refs[i].f = av_frame_alloc();
1730 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1731 vp9_decode_free(avctx);
1732 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1733 return AVERROR(ENOMEM);
1740 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1742 VP9Context *s = avctx->priv_data;
1744 avctx->internal->allocate_progress = 1;
1746 s->s.h.filter.sharpness = -1;
1748 return init_frames(avctx);
1752 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1754 return init_frames(avctx);
1757 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1760 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1762 for (i = 0; i < 3; i++) {
1763 if (s->s.frames[i].tf.f->buf[0])
1764 vp9_frame_unref(dst, &s->s.frames[i]);
1765 if (ssrc->s.frames[i].tf.f->buf[0]) {
1766 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1770 for (i = 0; i < 8; i++) {
1771 if (s->s.refs[i].f->buf[0])
1772 ff_thread_release_buffer(dst, &s->s.refs[i]);
1773 if (ssrc->next_refs[i].f->buf[0]) {
1774 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1779 s->s.h.invisible = ssrc->s.h.invisible;
1780 s->s.h.keyframe = ssrc->s.h.keyframe;
1781 s->s.h.intraonly = ssrc->s.h.intraonly;
1782 s->ss_v = ssrc->ss_v;
1783 s->ss_h = ssrc->ss_h;
1784 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1785 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1786 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1787 s->bytesperpixel = ssrc->bytesperpixel;
1788 s->gf_fmt = ssrc->gf_fmt;
1791 s->s.h.bpp = ssrc->s.h.bpp;
1792 s->bpp_index = ssrc->bpp_index;
1793 s->pix_fmt = ssrc->pix_fmt;
1794 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1795 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1796 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1797 sizeof(s->s.h.segmentation.feat));
1803 AVCodec ff_vp9_decoder = {
1805 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1806 .type = AVMEDIA_TYPE_VIDEO,
1807 .id = AV_CODEC_ID_VP9,
1808 .priv_data_size = sizeof(VP9Context),
1809 .init = vp9_decode_init,
1810 .close = vp9_decode_free,
1811 .decode = vp9_decode_frame,
1812 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1813 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1814 .flush = vp9_decode_flush,
1815 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1816 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1817 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1818 .bsfs = "vp9_superframe_split",
1819 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1820 #if CONFIG_VP9_DXVA2_HWACCEL
1823 #if CONFIG_VP9_D3D11VA_HWACCEL
1824 HWACCEL_D3D11VA(vp9),
1826 #if CONFIG_VP9_D3D11VA2_HWACCEL
1827 HWACCEL_D3D11VA2(vp9),
1829 #if CONFIG_VP9_NVDEC_HWACCEL
1832 #if CONFIG_VP9_VAAPI_HWACCEL
1835 #if CONFIG_VP9_VDPAU_HWACCEL