2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "libavutil/avassert.h"
35 #include "libavutil/pixdesc.h"
37 #define VP9_SYNCCODE 0x498342
40 static void vp9_free_entries(AVCodecContext *avctx) {
41 VP9Context *s = avctx->priv_data;
43 if (avctx->active_thread_type & FF_THREAD_SLICE) {
44 pthread_mutex_destroy(&s->progress_mutex);
45 pthread_cond_destroy(&s->progress_cond);
46 av_freep(&s->entries);
50 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
51 VP9Context *s = avctx->priv_data;
54 if (avctx->active_thread_type & FF_THREAD_SLICE) {
56 av_freep(&s->entries);
58 s->entries = av_malloc_array(n, sizeof(atomic_int));
61 av_freep(&s->entries);
62 return AVERROR(ENOMEM);
65 for (i = 0; i < n; i++)
66 atomic_init(&s->entries[i], 0);
68 pthread_mutex_init(&s->progress_mutex, NULL);
69 pthread_cond_init(&s->progress_cond, NULL);
74 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
75 pthread_mutex_lock(&s->progress_mutex);
76 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
77 pthread_cond_signal(&s->progress_cond);
78 pthread_mutex_unlock(&s->progress_mutex);
81 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
82 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
85 pthread_mutex_lock(&s->progress_mutex);
86 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
87 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
88 pthread_mutex_unlock(&s->progress_mutex);
91 static void vp9_free_entries(AVCodecContext *avctx) {}
92 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
95 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
97 ff_thread_release_buffer(avctx, &f->tf);
98 av_buffer_unref(&f->extradata);
99 av_buffer_unref(&f->hwaccel_priv_buf);
100 f->segmentation_map = NULL;
101 f->hwaccel_picture_private = NULL;
104 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
106 VP9Context *s = avctx->priv_data;
109 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
113 sz = 64 * s->sb_cols * s->sb_rows;
114 f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
119 f->segmentation_map = f->extradata->data;
120 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
122 if (avctx->hwaccel) {
123 const AVHWAccel *hwaccel = avctx->hwaccel;
124 av_assert0(!f->hwaccel_picture_private);
125 if (hwaccel->frame_priv_data_size) {
126 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
127 if (!f->hwaccel_priv_buf)
129 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
136 vp9_frame_unref(avctx, f);
137 return AVERROR(ENOMEM);
140 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
144 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
148 dst->extradata = av_buffer_ref(src->extradata);
152 dst->segmentation_map = src->segmentation_map;
154 dst->uses_2pass = src->uses_2pass;
156 if (src->hwaccel_picture_private) {
157 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
158 if (!dst->hwaccel_priv_buf)
160 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
166 vp9_frame_unref(avctx, dst);
167 return AVERROR(ENOMEM);
170 static int update_size(AVCodecContext *avctx, int w, int h)
172 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + CONFIG_VP9_D3D11VA_HWACCEL * 2 + CONFIG_VP9_VAAPI_HWACCEL)
173 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
174 VP9Context *s = avctx->priv_data;
176 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
179 av_assert0(w > 0 && h > 0);
181 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
182 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
185 switch (s->pix_fmt) {
186 case AV_PIX_FMT_YUV420P:
187 #if CONFIG_VP9_DXVA2_HWACCEL
188 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
190 #if CONFIG_VP9_D3D11VA_HWACCEL
191 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
192 *fmtp++ = AV_PIX_FMT_D3D11;
194 #if CONFIG_VP9_VAAPI_HWACCEL
195 *fmtp++ = AV_PIX_FMT_VAAPI;
198 case AV_PIX_FMT_YUV420P10:
199 case AV_PIX_FMT_YUV420P12:
200 #if CONFIG_VP9_VAAPI_HWACCEL
201 *fmtp++ = AV_PIX_FMT_VAAPI;
206 *fmtp++ = s->pix_fmt;
207 *fmtp = AV_PIX_FMT_NONE;
209 ret = ff_thread_get_format(avctx, pix_fmts);
213 avctx->pix_fmt = ret;
214 s->gf_fmt = s->pix_fmt;
222 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
225 s->last_fmt = s->pix_fmt;
226 s->sb_cols = (w + 63) >> 6;
227 s->sb_rows = (h + 63) >> 6;
228 s->cols = (w + 7) >> 3;
229 s->rows = (h + 7) >> 3;
230 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
232 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
233 av_freep(&s->intra_pred_data[0]);
234 // FIXME we slightly over-allocate here for subsampled chroma, but a little
235 // bit of padding shouldn't affect performance...
236 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
237 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
239 return AVERROR(ENOMEM);
240 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
241 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
242 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
243 assign(s->above_y_nnz_ctx, uint8_t *, 16);
244 assign(s->above_mode_ctx, uint8_t *, 16);
245 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
246 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
247 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
248 assign(s->above_partition_ctx, uint8_t *, 8);
249 assign(s->above_skip_ctx, uint8_t *, 8);
250 assign(s->above_txfm_ctx, uint8_t *, 8);
251 assign(s->above_segpred_ctx, uint8_t *, 8);
252 assign(s->above_intra_ctx, uint8_t *, 8);
253 assign(s->above_comp_ctx, uint8_t *, 8);
254 assign(s->above_ref_ctx, uint8_t *, 8);
255 assign(s->above_filter_ctx, uint8_t *, 8);
256 assign(s->lflvl, VP9Filter *, lflvl_len);
260 for (i = 0; i < s->active_tile_cols; i++) {
261 av_freep(&s->td[i].b_base);
262 av_freep(&s->td[i].block_base);
266 if (s->s.h.bpp != s->last_bpp) {
267 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
268 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
269 s->last_bpp = s->s.h.bpp;
275 static int update_block_buffers(AVCodecContext *avctx)
278 VP9Context *s = avctx->priv_data;
279 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
280 VP9TileData *td = &s->td[0];
282 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
286 av_free(td->block_base);
287 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
288 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
289 if (s->s.frames[CUR_FRAME].uses_2pass) {
290 int sbs = s->sb_cols * s->sb_rows;
292 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
293 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
294 16 * 16 + 2 * chroma_eobs) * sbs);
295 if (!td->b_base || !td->block_base)
296 return AVERROR(ENOMEM);
297 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
298 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
299 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
300 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
301 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
303 for (i = 1; i < s->active_tile_cols; i++) {
304 if (s->td[i].b_base && s->td[i].block_base) {
305 av_free(s->td[i].b_base);
306 av_free(s->td[i].block_base);
309 for (i = 0; i < s->active_tile_cols; i++) {
310 s->td[i].b_base = av_malloc(sizeof(VP9Block));
311 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
312 16 * 16 + 2 * chroma_eobs);
313 if (!s->td[i].b_base || !s->td[i].block_base)
314 return AVERROR(ENOMEM);
315 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
316 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
317 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
318 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
319 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
322 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
327 // The sign bit is at the end, not the start, of a bit sequence
328 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
330 int v = get_bits(gb, n);
331 return get_bits1(gb) ? -v : v;
334 static av_always_inline int inv_recenter_nonneg(int v, int m)
339 return m - ((v + 1) >> 1);
343 // differential forward probability updates
344 static int update_prob(VP56RangeCoder *c, int p)
346 static const int inv_map_table[255] = {
347 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
348 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
349 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
350 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
351 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
352 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
353 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
354 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
355 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
356 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
357 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
358 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
359 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
360 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
361 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
362 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
363 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
364 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
369 /* This code is trying to do a differential probability update. For a
370 * current probability A in the range [1, 255], the difference to a new
371 * probability of any value can be expressed differentially as 1-A, 255-A
372 * where some part of this (absolute range) exists both in positive as
373 * well as the negative part, whereas another part only exists in one
374 * half. We're trying to code this shared part differentially, i.e.
375 * times two where the value of the lowest bit specifies the sign, and
376 * the single part is then coded on top of this. This absolute difference
377 * then again has a value of [0, 254], but a bigger value in this range
378 * indicates that we're further away from the original value A, so we
379 * can code this as a VLC code, since higher values are increasingly
380 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
381 * updates vs. the 'fine, exact' updates further down the range, which
382 * adds one extra dimension to this differential update model. */
384 if (!vp8_rac_get(c)) {
385 d = vp8_rac_get_uint(c, 4) + 0;
386 } else if (!vp8_rac_get(c)) {
387 d = vp8_rac_get_uint(c, 4) + 16;
388 } else if (!vp8_rac_get(c)) {
389 d = vp8_rac_get_uint(c, 5) + 32;
391 d = vp8_rac_get_uint(c, 7);
393 d = (d << 1) - 65 + vp8_rac_get(c);
395 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
398 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
399 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
402 static int read_colorspace_details(AVCodecContext *avctx)
404 static const enum AVColorSpace colorspaces[8] = {
405 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
406 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
408 VP9Context *s = avctx->priv_data;
409 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
412 s->s.h.bpp = 8 + bits * 2;
413 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
414 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
415 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
416 static const enum AVPixelFormat pix_fmt_rgb[3] = {
417 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
419 s->ss_h = s->ss_v = 0;
420 avctx->color_range = AVCOL_RANGE_JPEG;
421 s->pix_fmt = pix_fmt_rgb[bits];
422 if (avctx->profile & 1) {
423 if (get_bits1(&s->gb)) {
424 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
425 return AVERROR_INVALIDDATA;
428 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
430 return AVERROR_INVALIDDATA;
433 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
434 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
435 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
436 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
437 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
438 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
439 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
441 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
442 if (avctx->profile & 1) {
443 s->ss_h = get_bits1(&s->gb);
444 s->ss_v = get_bits1(&s->gb);
445 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
446 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
447 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
449 return AVERROR_INVALIDDATA;
450 } else if (get_bits1(&s->gb)) {
451 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
453 return AVERROR_INVALIDDATA;
456 s->ss_h = s->ss_v = 1;
457 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
464 static int decode_frame_header(AVCodecContext *avctx,
465 const uint8_t *data, int size, int *ref)
467 VP9Context *s = avctx->priv_data;
468 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
470 const uint8_t *data2;
473 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
474 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
477 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
478 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
479 return AVERROR_INVALIDDATA;
481 avctx->profile = get_bits1(&s->gb);
482 avctx->profile |= get_bits1(&s->gb) << 1;
483 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
484 if (avctx->profile > 3) {
485 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
486 return AVERROR_INVALIDDATA;
488 s->s.h.profile = avctx->profile;
489 if (get_bits1(&s->gb)) {
490 *ref = get_bits(&s->gb, 3);
494 s->last_keyframe = s->s.h.keyframe;
495 s->s.h.keyframe = !get_bits1(&s->gb);
497 last_invisible = s->s.h.invisible;
498 s->s.h.invisible = !get_bits1(&s->gb);
499 s->s.h.errorres = get_bits1(&s->gb);
500 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
502 if (s->s.h.keyframe) {
503 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
504 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
505 return AVERROR_INVALIDDATA;
507 if ((ret = read_colorspace_details(avctx)) < 0)
509 // for profile 1, here follows the subsampling bits
510 s->s.h.refreshrefmask = 0xff;
511 w = get_bits(&s->gb, 16) + 1;
512 h = get_bits(&s->gb, 16) + 1;
513 if (get_bits1(&s->gb)) // display size
514 skip_bits(&s->gb, 32);
516 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
517 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
518 if (s->s.h.intraonly) {
519 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
520 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
521 return AVERROR_INVALIDDATA;
523 if (avctx->profile >= 1) {
524 if ((ret = read_colorspace_details(avctx)) < 0)
527 s->ss_h = s->ss_v = 1;
530 s->bytesperpixel = 1;
531 s->pix_fmt = AV_PIX_FMT_YUV420P;
532 avctx->colorspace = AVCOL_SPC_BT470BG;
533 avctx->color_range = AVCOL_RANGE_MPEG;
535 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
536 w = get_bits(&s->gb, 16) + 1;
537 h = get_bits(&s->gb, 16) + 1;
538 if (get_bits1(&s->gb)) // display size
539 skip_bits(&s->gb, 32);
541 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
542 s->s.h.refidx[0] = get_bits(&s->gb, 3);
543 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
544 s->s.h.refidx[1] = get_bits(&s->gb, 3);
545 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
546 s->s.h.refidx[2] = get_bits(&s->gb, 3);
547 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
548 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
549 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
550 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
551 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
552 return AVERROR_INVALIDDATA;
554 if (get_bits1(&s->gb)) {
555 w = s->s.refs[s->s.h.refidx[0]].f->width;
556 h = s->s.refs[s->s.h.refidx[0]].f->height;
557 } else if (get_bits1(&s->gb)) {
558 w = s->s.refs[s->s.h.refidx[1]].f->width;
559 h = s->s.refs[s->s.h.refidx[1]].f->height;
560 } else if (get_bits1(&s->gb)) {
561 w = s->s.refs[s->s.h.refidx[2]].f->width;
562 h = s->s.refs[s->s.h.refidx[2]].f->height;
564 w = get_bits(&s->gb, 16) + 1;
565 h = get_bits(&s->gb, 16) + 1;
567 // Note that in this code, "CUR_FRAME" is actually before we
568 // have formally allocated a frame, and thus actually represents
570 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
571 s->s.frames[CUR_FRAME].tf.f->height == h;
572 if (get_bits1(&s->gb)) // display size
573 skip_bits(&s->gb, 32);
574 s->s.h.highprecisionmvs = get_bits1(&s->gb);
575 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
577 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
578 s->s.h.signbias[0] != s->s.h.signbias[2];
579 if (s->s.h.allowcompinter) {
580 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
581 s->s.h.fixcompref = 2;
582 s->s.h.varcompref[0] = 0;
583 s->s.h.varcompref[1] = 1;
584 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
585 s->s.h.fixcompref = 1;
586 s->s.h.varcompref[0] = 0;
587 s->s.h.varcompref[1] = 2;
589 s->s.h.fixcompref = 0;
590 s->s.h.varcompref[0] = 1;
591 s->s.h.varcompref[1] = 2;
596 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
597 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
598 s->s.h.framectxid = c = get_bits(&s->gb, 2);
599 if (s->s.h.keyframe || s->s.h.intraonly)
600 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
602 /* loopfilter header data */
603 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
604 // reset loopfilter defaults
605 s->s.h.lf_delta.ref[0] = 1;
606 s->s.h.lf_delta.ref[1] = 0;
607 s->s.h.lf_delta.ref[2] = -1;
608 s->s.h.lf_delta.ref[3] = -1;
609 s->s.h.lf_delta.mode[0] = 0;
610 s->s.h.lf_delta.mode[1] = 0;
611 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
613 s->s.h.filter.level = get_bits(&s->gb, 6);
614 sharp = get_bits(&s->gb, 3);
615 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
616 // the old cache values since they are still valid
617 if (s->s.h.filter.sharpness != sharp) {
618 for (i = 1; i <= 63; i++) {
622 limit >>= (sharp + 3) >> 2;
623 limit = FFMIN(limit, 9 - sharp);
625 limit = FFMAX(limit, 1);
627 s->filter_lut.lim_lut[i] = limit;
628 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
631 s->s.h.filter.sharpness = sharp;
632 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
633 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
634 for (i = 0; i < 4; i++)
635 if (get_bits1(&s->gb))
636 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
637 for (i = 0; i < 2; i++)
638 if (get_bits1(&s->gb))
639 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
643 /* quantization header data */
644 s->s.h.yac_qi = get_bits(&s->gb, 8);
645 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
646 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
647 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
648 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
649 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
651 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
653 /* segmentation header info */
654 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
655 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
656 for (i = 0; i < 7; i++)
657 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
658 get_bits(&s->gb, 8) : 255;
659 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
660 for (i = 0; i < 3; i++)
661 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
662 get_bits(&s->gb, 8) : 255;
665 if (get_bits1(&s->gb)) {
666 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
667 for (i = 0; i < 8; i++) {
668 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
669 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
670 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
671 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
672 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
673 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
674 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
679 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
680 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
681 int qyac, qydc, quvac, quvdc, lflvl, sh;
683 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
684 if (s->s.h.segmentation.absolute_vals)
685 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
687 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
689 qyac = s->s.h.yac_qi;
691 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
692 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
693 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
694 qyac = av_clip_uintp2(qyac, 8);
696 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
697 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
698 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
699 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
701 sh = s->s.h.filter.level >= 32;
702 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
703 if (s->s.h.segmentation.absolute_vals)
704 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
706 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
708 lflvl = s->s.h.filter.level;
710 if (s->s.h.lf_delta.enabled) {
711 s->s.h.segmentation.feat[i].lflvl[0][0] =
712 s->s.h.segmentation.feat[i].lflvl[0][1] =
713 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
714 for (j = 1; j < 4; j++) {
715 s->s.h.segmentation.feat[i].lflvl[j][0] =
716 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
717 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
718 s->s.h.segmentation.feat[i].lflvl[j][1] =
719 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
720 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
723 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
724 sizeof(s->s.h.segmentation.feat[i].lflvl));
729 if ((ret = update_size(avctx, w, h)) < 0) {
730 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
734 for (s->s.h.tiling.log2_tile_cols = 0;
735 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
736 s->s.h.tiling.log2_tile_cols++) ;
737 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
738 max = FFMAX(0, max - 1);
739 while (max > s->s.h.tiling.log2_tile_cols) {
740 if (get_bits1(&s->gb))
741 s->s.h.tiling.log2_tile_cols++;
745 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
746 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
747 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
752 for (i = 0; i < s->active_tile_cols; i++) {
753 av_free(s->td[i].b_base);
754 av_free(s->td[i].block_base);
759 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
760 vp9_free_entries(avctx);
761 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
762 s->s.h.tiling.tile_cols : 1;
763 vp9_alloc_entries(avctx, s->sb_rows);
764 if (avctx->active_thread_type == FF_THREAD_SLICE) {
765 n_range_coders = 4; // max_tile_rows
767 n_range_coders = s->s.h.tiling.tile_cols;
769 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
770 n_range_coders * sizeof(VP56RangeCoder));
772 return AVERROR(ENOMEM);
773 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
774 for (i = 0; i < s->active_tile_cols; i++) {
777 rc += n_range_coders;
781 /* check reference frames */
782 if (!s->s.h.keyframe && !s->s.h.intraonly) {
783 for (i = 0; i < 3; i++) {
784 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
785 int refw = ref->width, refh = ref->height;
787 if (ref->format != avctx->pix_fmt) {
788 av_log(avctx, AV_LOG_ERROR,
789 "Ref pixfmt (%s) did not match current frame (%s)",
790 av_get_pix_fmt_name(ref->format),
791 av_get_pix_fmt_name(avctx->pix_fmt));
792 return AVERROR_INVALIDDATA;
793 } else if (refw == w && refh == h) {
794 s->mvscale[i][0] = s->mvscale[i][1] = 0;
796 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
797 av_log(avctx, AV_LOG_ERROR,
798 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
800 return AVERROR_INVALIDDATA;
802 s->mvscale[i][0] = (refw << 14) / w;
803 s->mvscale[i][1] = (refh << 14) / h;
804 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
805 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
810 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
811 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
812 s->prob_ctx[3].p = ff_vp9_default_probs;
813 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
814 sizeof(ff_vp9_default_coef_probs));
815 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
816 sizeof(ff_vp9_default_coef_probs));
817 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
818 sizeof(ff_vp9_default_coef_probs));
819 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
820 sizeof(ff_vp9_default_coef_probs));
821 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
822 s->prob_ctx[c].p = ff_vp9_default_probs;
823 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
824 sizeof(ff_vp9_default_coef_probs));
827 // next 16 bits is size of the rest of the header (arith-coded)
828 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
829 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
831 data2 = align_get_bits(&s->gb);
832 if (size2 > size - (data2 - data)) {
833 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
834 return AVERROR_INVALIDDATA;
836 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
840 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
841 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
842 return AVERROR_INVALIDDATA;
845 for (i = 0; i < s->active_tile_cols; i++) {
846 if (s->s.h.keyframe || s->s.h.intraonly) {
847 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
848 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
850 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
854 /* FIXME is it faster to not copy here, but do it down in the fw updates
855 * as explicit copies if the fw update is missing (and skip the copy upon
857 s->prob.p = s->prob_ctx[c].p;
860 if (s->s.h.lossless) {
861 s->s.h.txfmmode = TX_4X4;
863 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
864 if (s->s.h.txfmmode == 3)
865 s->s.h.txfmmode += vp8_rac_get(&s->c);
867 if (s->s.h.txfmmode == TX_SWITCHABLE) {
868 for (i = 0; i < 2; i++)
869 if (vp56_rac_get_prob_branchy(&s->c, 252))
870 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
871 for (i = 0; i < 2; i++)
872 for (j = 0; j < 2; j++)
873 if (vp56_rac_get_prob_branchy(&s->c, 252))
874 s->prob.p.tx16p[i][j] =
875 update_prob(&s->c, s->prob.p.tx16p[i][j]);
876 for (i = 0; i < 2; i++)
877 for (j = 0; j < 3; j++)
878 if (vp56_rac_get_prob_branchy(&s->c, 252))
879 s->prob.p.tx32p[i][j] =
880 update_prob(&s->c, s->prob.p.tx32p[i][j]);
885 for (i = 0; i < 4; i++) {
886 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
887 if (vp8_rac_get(&s->c)) {
888 for (j = 0; j < 2; j++)
889 for (k = 0; k < 2; k++)
890 for (l = 0; l < 6; l++)
891 for (m = 0; m < 6; m++) {
892 uint8_t *p = s->prob.coef[i][j][k][l][m];
893 uint8_t *r = ref[j][k][l][m];
894 if (m >= 3 && l == 0) // dc only has 3 pt
896 for (n = 0; n < 3; n++) {
897 if (vp56_rac_get_prob_branchy(&s->c, 252))
898 p[n] = update_prob(&s->c, r[n]);
902 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
905 for (j = 0; j < 2; j++)
906 for (k = 0; k < 2; k++)
907 for (l = 0; l < 6; l++)
908 for (m = 0; m < 6; m++) {
909 uint8_t *p = s->prob.coef[i][j][k][l][m];
910 uint8_t *r = ref[j][k][l][m];
911 if (m > 3 && l == 0) // dc only has 3 pt
914 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
917 if (s->s.h.txfmmode == i)
922 for (i = 0; i < 3; i++)
923 if (vp56_rac_get_prob_branchy(&s->c, 252))
924 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
925 if (!s->s.h.keyframe && !s->s.h.intraonly) {
926 for (i = 0; i < 7; i++)
927 for (j = 0; j < 3; j++)
928 if (vp56_rac_get_prob_branchy(&s->c, 252))
929 s->prob.p.mv_mode[i][j] =
930 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
932 if (s->s.h.filtermode == FILTER_SWITCHABLE)
933 for (i = 0; i < 4; i++)
934 for (j = 0; j < 2; j++)
935 if (vp56_rac_get_prob_branchy(&s->c, 252))
936 s->prob.p.filter[i][j] =
937 update_prob(&s->c, s->prob.p.filter[i][j]);
939 for (i = 0; i < 4; i++)
940 if (vp56_rac_get_prob_branchy(&s->c, 252))
941 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
943 if (s->s.h.allowcompinter) {
944 s->s.h.comppredmode = vp8_rac_get(&s->c);
945 if (s->s.h.comppredmode)
946 s->s.h.comppredmode += vp8_rac_get(&s->c);
947 if (s->s.h.comppredmode == PRED_SWITCHABLE)
948 for (i = 0; i < 5; i++)
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
951 update_prob(&s->c, s->prob.p.comp[i]);
953 s->s.h.comppredmode = PRED_SINGLEREF;
956 if (s->s.h.comppredmode != PRED_COMPREF) {
957 for (i = 0; i < 5; i++) {
958 if (vp56_rac_get_prob_branchy(&s->c, 252))
959 s->prob.p.single_ref[i][0] =
960 update_prob(&s->c, s->prob.p.single_ref[i][0]);
961 if (vp56_rac_get_prob_branchy(&s->c, 252))
962 s->prob.p.single_ref[i][1] =
963 update_prob(&s->c, s->prob.p.single_ref[i][1]);
967 if (s->s.h.comppredmode != PRED_SINGLEREF) {
968 for (i = 0; i < 5; i++)
969 if (vp56_rac_get_prob_branchy(&s->c, 252))
970 s->prob.p.comp_ref[i] =
971 update_prob(&s->c, s->prob.p.comp_ref[i]);
974 for (i = 0; i < 4; i++)
975 for (j = 0; j < 9; j++)
976 if (vp56_rac_get_prob_branchy(&s->c, 252))
977 s->prob.p.y_mode[i][j] =
978 update_prob(&s->c, s->prob.p.y_mode[i][j]);
980 for (i = 0; i < 4; i++)
981 for (j = 0; j < 4; j++)
982 for (k = 0; k < 3; k++)
983 if (vp56_rac_get_prob_branchy(&s->c, 252))
984 s->prob.p.partition[3 - i][j][k] =
986 s->prob.p.partition[3 - i][j][k]);
988 // mv fields don't use the update_prob subexp model for some reason
989 for (i = 0; i < 3; i++)
990 if (vp56_rac_get_prob_branchy(&s->c, 252))
991 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
993 for (i = 0; i < 2; i++) {
994 if (vp56_rac_get_prob_branchy(&s->c, 252))
995 s->prob.p.mv_comp[i].sign =
996 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
998 for (j = 0; j < 10; j++)
999 if (vp56_rac_get_prob_branchy(&s->c, 252))
1000 s->prob.p.mv_comp[i].classes[j] =
1001 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1003 if (vp56_rac_get_prob_branchy(&s->c, 252))
1004 s->prob.p.mv_comp[i].class0 =
1005 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1007 for (j = 0; j < 10; j++)
1008 if (vp56_rac_get_prob_branchy(&s->c, 252))
1009 s->prob.p.mv_comp[i].bits[j] =
1010 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1013 for (i = 0; i < 2; i++) {
1014 for (j = 0; j < 2; j++)
1015 for (k = 0; k < 3; k++)
1016 if (vp56_rac_get_prob_branchy(&s->c, 252))
1017 s->prob.p.mv_comp[i].class0_fp[j][k] =
1018 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1020 for (j = 0; j < 3; j++)
1021 if (vp56_rac_get_prob_branchy(&s->c, 252))
1022 s->prob.p.mv_comp[i].fp[j] =
1023 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1026 if (s->s.h.highprecisionmvs) {
1027 for (i = 0; i < 2; i++) {
1028 if (vp56_rac_get_prob_branchy(&s->c, 252))
1029 s->prob.p.mv_comp[i].class0_hp =
1030 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1032 if (vp56_rac_get_prob_branchy(&s->c, 252))
1033 s->prob.p.mv_comp[i].hp =
1034 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1039 return (data2 - data) + size2;
1042 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1043 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1045 const VP9Context *s = td->s;
1046 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1047 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1048 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1049 s->prob.p.partition[bl][c];
1050 enum BlockPartition bp;
1051 ptrdiff_t hbs = 4 >> bl;
1052 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1053 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1054 int bytesperpixel = s->bytesperpixel;
1057 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1058 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1059 } else if (col + hbs < s->cols) { // FIXME why not <=?
1060 if (row + hbs < s->rows) { // FIXME why not <=?
1061 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1063 case PARTITION_NONE:
1064 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1067 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1068 yoff += hbs * 8 * y_stride;
1069 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1070 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1073 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1074 yoff += hbs * 8 * bytesperpixel;
1075 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1076 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1078 case PARTITION_SPLIT:
1079 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1080 decode_sb(td, row, col + hbs, lflvl,
1081 yoff + 8 * hbs * bytesperpixel,
1082 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1083 yoff += hbs * 8 * y_stride;
1084 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1085 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1086 decode_sb(td, row + hbs, col + hbs, lflvl,
1087 yoff + 8 * hbs * bytesperpixel,
1088 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1093 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1094 bp = PARTITION_SPLIT;
1095 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1096 decode_sb(td, row, col + hbs, lflvl,
1097 yoff + 8 * hbs * bytesperpixel,
1098 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1101 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1103 } else if (row + hbs < s->rows) { // FIXME why not <=?
1104 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1105 bp = PARTITION_SPLIT;
1106 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1107 yoff += hbs * 8 * y_stride;
1108 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1109 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1112 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1115 bp = PARTITION_SPLIT;
1116 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1118 td->counts.partition[bl][c][bp]++;
1121 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1122 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1124 const VP9Context *s = td->s;
1125 VP9Block *b = td->b;
1126 ptrdiff_t hbs = 4 >> bl;
1127 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1128 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1129 int bytesperpixel = s->bytesperpixel;
1132 av_assert2(b->bl == BL_8X8);
1133 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1134 } else if (td->b->bl == bl) {
1135 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1136 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1137 yoff += hbs * 8 * y_stride;
1138 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1139 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1140 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1141 yoff += hbs * 8 * bytesperpixel;
1142 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1143 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1146 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1147 if (col + hbs < s->cols) { // FIXME why not <=?
1148 if (row + hbs < s->rows) {
1149 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1150 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1151 yoff += hbs * 8 * y_stride;
1152 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1153 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1154 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1155 yoff + 8 * hbs * bytesperpixel,
1156 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1158 yoff += hbs * 8 * bytesperpixel;
1159 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1160 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1162 } else if (row + hbs < s->rows) {
1163 yoff += hbs * 8 * y_stride;
1164 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1165 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1170 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1172 int sb_start = ( idx * n) >> log2_n;
1173 int sb_end = ((idx + 1) * n) >> log2_n;
1174 *start = FFMIN(sb_start, n) << 3;
1175 *end = FFMIN(sb_end, n) << 3;
1178 static void free_buffers(VP9Context *s)
1182 av_freep(&s->intra_pred_data[0]);
1183 for (i = 0; i < s->active_tile_cols; i++) {
1184 av_freep(&s->td[i].b_base);
1185 av_freep(&s->td[i].block_base);
1189 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1191 VP9Context *s = avctx->priv_data;
1194 for (i = 0; i < 3; i++) {
1195 if (s->s.frames[i].tf.f->buf[0])
1196 vp9_frame_unref(avctx, &s->s.frames[i]);
1197 av_frame_free(&s->s.frames[i].tf.f);
1199 for (i = 0; i < 8; i++) {
1200 if (s->s.refs[i].f->buf[0])
1201 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1202 av_frame_free(&s->s.refs[i].f);
1203 if (s->next_refs[i].f->buf[0])
1204 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1205 av_frame_free(&s->next_refs[i].f);
1209 vp9_free_entries(avctx);
1214 static int decode_tiles(AVCodecContext *avctx,
1215 const uint8_t *data, int size)
1217 VP9Context *s = avctx->priv_data;
1218 VP9TileData *td = &s->td[0];
1219 int row, col, tile_row, tile_col, ret;
1221 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1223 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1225 f = s->s.frames[CUR_FRAME].tf.f;
1226 ls_y = f->linesize[0];
1227 ls_uv =f->linesize[1];
1228 bytesperpixel = s->bytesperpixel;
1231 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1232 set_tile_offset(&tile_row_start, &tile_row_end,
1233 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1235 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1238 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1239 tile_row == s->s.h.tiling.tile_rows - 1) {
1242 tile_size = AV_RB32(data);
1246 if (tile_size > size) {
1247 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1248 return AVERROR_INVALIDDATA;
1250 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1253 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1254 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1255 return AVERROR_INVALIDDATA;
1261 for (row = tile_row_start; row < tile_row_end;
1262 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1263 VP9Filter *lflvl_ptr = s->lflvl;
1264 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1266 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1267 set_tile_offset(&tile_col_start, &tile_col_end,
1268 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1269 td->tile_col_start = tile_col_start;
1271 memset(td->left_partition_ctx, 0, 8);
1272 memset(td->left_skip_ctx, 0, 8);
1273 if (s->s.h.keyframe || s->s.h.intraonly) {
1274 memset(td->left_mode_ctx, DC_PRED, 16);
1276 memset(td->left_mode_ctx, NEARESTMV, 8);
1278 memset(td->left_y_nnz_ctx, 0, 16);
1279 memset(td->left_uv_nnz_ctx, 0, 32);
1280 memset(td->left_segpred_ctx, 0, 8);
1282 td->c = &td->c_b[tile_col];
1285 for (col = tile_col_start;
1287 col += 8, yoff2 += 64 * bytesperpixel,
1288 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1289 // FIXME integrate with lf code (i.e. zero after each
1290 // use, similar to invtxfm coefficients, or similar)
1292 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1296 decode_sb_mem(td, row, col, lflvl_ptr,
1297 yoff2, uvoff2, BL_64X64);
1299 decode_sb(td, row, col, lflvl_ptr,
1300 yoff2, uvoff2, BL_64X64);
1308 // backup pre-loopfilter reconstruction data for intra
1309 // prediction of next row of sb64s
1310 if (row + 8 < s->rows) {
1311 memcpy(s->intra_pred_data[0],
1312 f->data[0] + yoff + 63 * ls_y,
1313 8 * s->cols * bytesperpixel);
1314 memcpy(s->intra_pred_data[1],
1315 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1316 8 * s->cols * bytesperpixel >> s->ss_h);
1317 memcpy(s->intra_pred_data[2],
1318 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1319 8 * s->cols * bytesperpixel >> s->ss_h);
1322 // loopfilter one row
1323 if (s->s.h.filter.level) {
1326 lflvl_ptr = s->lflvl;
1327 for (col = 0; col < s->cols;
1328 col += 8, yoff2 += 64 * bytesperpixel,
1329 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1330 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1335 // FIXME maybe we can make this more finegrained by running the
1336 // loopfilter per-block instead of after each sbrow
1337 // In fact that would also make intra pred left preparation easier?
1338 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1345 static av_always_inline
1346 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1349 VP9Context *s = avctx->priv_data;
1350 VP9TileData *td = &s->td[jobnr];
1351 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1352 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1353 unsigned tile_cols_len;
1354 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1355 VP9Filter *lflvl_ptr_base;
1358 f = s->s.frames[CUR_FRAME].tf.f;
1359 ls_y = f->linesize[0];
1360 ls_uv =f->linesize[1];
1362 set_tile_offset(&tile_col_start, &tile_col_end,
1363 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1364 td->tile_col_start = tile_col_start;
1365 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1366 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1367 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1369 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1370 set_tile_offset(&tile_row_start, &tile_row_end,
1371 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1373 td->c = &td->c_b[tile_row];
1374 for (row = tile_row_start; row < tile_row_end;
1375 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1376 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1377 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1379 memset(td->left_partition_ctx, 0, 8);
1380 memset(td->left_skip_ctx, 0, 8);
1381 if (s->s.h.keyframe || s->s.h.intraonly) {
1382 memset(td->left_mode_ctx, DC_PRED, 16);
1384 memset(td->left_mode_ctx, NEARESTMV, 8);
1386 memset(td->left_y_nnz_ctx, 0, 16);
1387 memset(td->left_uv_nnz_ctx, 0, 32);
1388 memset(td->left_segpred_ctx, 0, 8);
1390 for (col = tile_col_start;
1392 col += 8, yoff2 += 64 * bytesperpixel,
1393 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1394 // FIXME integrate with lf code (i.e. zero after each
1395 // use, similar to invtxfm coefficients, or similar)
1396 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1397 decode_sb(td, row, col, lflvl_ptr,
1398 yoff2, uvoff2, BL_64X64);
1401 // backup pre-loopfilter reconstruction data for intra
1402 // prediction of next row of sb64s
1403 tile_cols_len = tile_col_end - tile_col_start;
1404 if (row + 8 < s->rows) {
1405 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1406 f->data[0] + yoff + 63 * ls_y,
1407 8 * tile_cols_len * bytesperpixel);
1408 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1409 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1410 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1411 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1412 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1413 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1416 vp9_report_tile_progress(s, row >> 3, 1);
1422 static av_always_inline
1423 int loopfilter_proc(AVCodecContext *avctx)
1425 VP9Context *s = avctx->priv_data;
1426 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1427 VP9Filter *lflvl_ptr;
1428 int bytesperpixel = s->bytesperpixel, col, i;
1431 f = s->s.frames[CUR_FRAME].tf.f;
1432 ls_y = f->linesize[0];
1433 ls_uv =f->linesize[1];
1435 for (i = 0; i < s->sb_rows; i++) {
1436 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1438 if (s->s.h.filter.level) {
1439 yoff = (ls_y * 64)*i;
1440 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1441 lflvl_ptr = s->lflvl+s->sb_cols*i;
1442 for (col = 0; col < s->cols;
1443 col += 8, yoff += 64 * bytesperpixel,
1444 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1445 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1454 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1455 int *got_frame, AVPacket *pkt)
1457 const uint8_t *data = pkt->data;
1458 int size = pkt->size;
1459 VP9Context *s = avctx->priv_data;
1461 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1462 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1465 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1467 } else if (ret == 0) {
1468 if (!s->s.refs[ref].f->buf[0]) {
1469 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1470 return AVERROR_INVALIDDATA;
1472 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1474 ((AVFrame *)frame)->pts = pkt->pts;
1476 FF_DISABLE_DEPRECATION_WARNINGS
1477 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1478 FF_ENABLE_DEPRECATION_WARNINGS
1480 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1481 for (i = 0; i < 8; i++) {
1482 if (s->next_refs[i].f->buf[0])
1483 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1484 if (s->s.refs[i].f->buf[0] &&
1485 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1494 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1495 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1496 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1497 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1498 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1501 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1502 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1503 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1504 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1506 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1507 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1508 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1510 f = s->s.frames[CUR_FRAME].tf.f;
1511 f->key_frame = s->s.h.keyframe;
1512 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1514 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1515 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1516 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1517 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1521 for (i = 0; i < 8; i++) {
1522 if (s->next_refs[i].f->buf[0])
1523 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1524 if (s->s.h.refreshrefmask & (1 << i)) {
1525 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1526 } else if (s->s.refs[i].f->buf[0]) {
1527 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1533 if (avctx->hwaccel) {
1534 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1537 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1540 ret = avctx->hwaccel->end_frame(avctx);
1546 // main tile decode loop
1547 memset(s->above_partition_ctx, 0, s->cols);
1548 memset(s->above_skip_ctx, 0, s->cols);
1549 if (s->s.h.keyframe || s->s.h.intraonly) {
1550 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1552 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1554 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1555 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1556 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1557 memset(s->above_segpred_ctx, 0, s->cols);
1558 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1559 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1560 if ((ret = update_block_buffers(avctx)) < 0) {
1561 av_log(avctx, AV_LOG_ERROR,
1562 "Failed to allocate block buffers\n");
1565 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1568 for (i = 0; i < 4; i++) {
1569 for (j = 0; j < 2; j++)
1570 for (k = 0; k < 2; k++)
1571 for (l = 0; l < 6; l++)
1572 for (m = 0; m < 6; m++)
1573 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1574 s->prob.coef[i][j][k][l][m], 3);
1575 if (s->s.h.txfmmode == i)
1578 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1579 ff_thread_finish_setup(avctx);
1580 } else if (!s->s.h.refreshctx) {
1581 ff_thread_finish_setup(avctx);
1585 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1586 for (i = 0; i < s->sb_rows; i++)
1587 atomic_store(&s->entries[i], 0);
1592 for (i = 0; i < s->active_tile_cols; i++) {
1593 s->td[i].b = s->td[i].b_base;
1594 s->td[i].block = s->td[i].block_base;
1595 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1596 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1597 s->td[i].eob = s->td[i].eob_base;
1598 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1599 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1603 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1604 int tile_row, tile_col;
1606 av_assert1(!s->pass);
1608 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1609 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1612 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1613 tile_row == s->s.h.tiling.tile_rows - 1) {
1616 tile_size = AV_RB32(data);
1620 if (tile_size > size)
1621 return AVERROR_INVALIDDATA;
1622 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1625 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1626 return AVERROR_INVALIDDATA;
1632 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1636 ret = decode_tiles(avctx, data, size);
1641 // Sum all counts fields into td[0].counts for tile threading
1642 if (avctx->active_thread_type == FF_THREAD_SLICE)
1643 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1644 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1645 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1647 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1648 ff_vp9_adapt_probs(s);
1649 ff_thread_finish_setup(avctx);
1651 } while (s->pass++ == 1);
1652 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1656 for (i = 0; i < 8; i++) {
1657 if (s->s.refs[i].f->buf[0])
1658 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1659 if (s->next_refs[i].f->buf[0] &&
1660 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1664 if (!s->s.h.invisible) {
1665 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1673 static void vp9_decode_flush(AVCodecContext *avctx)
1675 VP9Context *s = avctx->priv_data;
1678 for (i = 0; i < 3; i++)
1679 vp9_frame_unref(avctx, &s->s.frames[i]);
1680 for (i = 0; i < 8; i++)
1681 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1684 static int init_frames(AVCodecContext *avctx)
1686 VP9Context *s = avctx->priv_data;
1689 for (i = 0; i < 3; i++) {
1690 s->s.frames[i].tf.f = av_frame_alloc();
1691 if (!s->s.frames[i].tf.f) {
1692 vp9_decode_free(avctx);
1693 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1694 return AVERROR(ENOMEM);
1697 for (i = 0; i < 8; i++) {
1698 s->s.refs[i].f = av_frame_alloc();
1699 s->next_refs[i].f = av_frame_alloc();
1700 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1701 vp9_decode_free(avctx);
1702 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1703 return AVERROR(ENOMEM);
1710 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1712 VP9Context *s = avctx->priv_data;
1714 avctx->internal->allocate_progress = 1;
1716 s->s.h.filter.sharpness = -1;
1718 return init_frames(avctx);
1722 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1724 return init_frames(avctx);
1727 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1730 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1732 for (i = 0; i < 3; i++) {
1733 if (s->s.frames[i].tf.f->buf[0])
1734 vp9_frame_unref(dst, &s->s.frames[i]);
1735 if (ssrc->s.frames[i].tf.f->buf[0]) {
1736 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1740 for (i = 0; i < 8; i++) {
1741 if (s->s.refs[i].f->buf[0])
1742 ff_thread_release_buffer(dst, &s->s.refs[i]);
1743 if (ssrc->next_refs[i].f->buf[0]) {
1744 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1749 s->s.h.invisible = ssrc->s.h.invisible;
1750 s->s.h.keyframe = ssrc->s.h.keyframe;
1751 s->s.h.intraonly = ssrc->s.h.intraonly;
1752 s->ss_v = ssrc->ss_v;
1753 s->ss_h = ssrc->ss_h;
1754 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1755 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1756 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1757 s->bytesperpixel = ssrc->bytesperpixel;
1758 s->gf_fmt = ssrc->gf_fmt;
1761 s->s.h.bpp = ssrc->s.h.bpp;
1762 s->bpp_index = ssrc->bpp_index;
1763 s->pix_fmt = ssrc->pix_fmt;
1764 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1765 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1766 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1767 sizeof(s->s.h.segmentation.feat));
1773 AVCodec ff_vp9_decoder = {
1775 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1776 .type = AVMEDIA_TYPE_VIDEO,
1777 .id = AV_CODEC_ID_VP9,
1778 .priv_data_size = sizeof(VP9Context),
1779 .init = vp9_decode_init,
1780 .close = vp9_decode_free,
1781 .decode = vp9_decode_frame,
1782 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1783 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1784 .flush = vp9_decode_flush,
1785 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1786 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1787 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),