2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
34 #include "libavutil/avassert.h"
35 #include "libavutil/pixdesc.h"
37 #define VP9_SYNCCODE 0x498342
40 static void vp9_free_entries(AVCodecContext *avctx) {
41 VP9Context *s = avctx->priv_data;
43 if (avctx->active_thread_type & FF_THREAD_SLICE) {
44 pthread_mutex_destroy(&s->progress_mutex);
45 pthread_cond_destroy(&s->progress_cond);
46 av_freep(&s->entries);
50 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
51 VP9Context *s = avctx->priv_data;
54 if (avctx->active_thread_type & FF_THREAD_SLICE) {
56 av_freep(&s->entries);
58 s->entries = av_malloc_array(n, sizeof(atomic_int));
61 av_freep(&s->entries);
62 return AVERROR(ENOMEM);
65 for (i = 0; i < n; i++)
66 atomic_init(&s->entries[i], 0);
68 pthread_mutex_init(&s->progress_mutex, NULL);
69 pthread_cond_init(&s->progress_cond, NULL);
74 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
75 pthread_mutex_lock(&s->progress_mutex);
76 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
77 pthread_cond_signal(&s->progress_cond);
78 pthread_mutex_unlock(&s->progress_mutex);
81 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
82 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
85 pthread_mutex_lock(&s->progress_mutex);
86 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
87 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
88 pthread_mutex_unlock(&s->progress_mutex);
91 static void vp9_free_entries(AVCodecContext *avctx) {}
92 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
95 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
97 ff_thread_release_buffer(avctx, &f->tf);
98 av_buffer_unref(&f->extradata);
99 av_buffer_unref(&f->hwaccel_priv_buf);
100 f->segmentation_map = NULL;
101 f->hwaccel_picture_private = NULL;
104 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
106 VP9Context *s = avctx->priv_data;
109 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
113 sz = 64 * s->sb_cols * s->sb_rows;
114 f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
119 f->segmentation_map = f->extradata->data;
120 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
122 if (avctx->hwaccel) {
123 const AVHWAccel *hwaccel = avctx->hwaccel;
124 av_assert0(!f->hwaccel_picture_private);
125 if (hwaccel->frame_priv_data_size) {
126 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
127 if (!f->hwaccel_priv_buf)
129 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
136 vp9_frame_unref(avctx, f);
137 return AVERROR(ENOMEM);
140 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
144 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
148 dst->extradata = av_buffer_ref(src->extradata);
152 dst->segmentation_map = src->segmentation_map;
154 dst->uses_2pass = src->uses_2pass;
156 if (src->hwaccel_picture_private) {
157 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
158 if (!dst->hwaccel_priv_buf)
160 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
166 vp9_frame_unref(avctx, dst);
167 return AVERROR(ENOMEM);
170 static int update_size(AVCodecContext *avctx, int w, int h)
172 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
173 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
174 CONFIG_VP9_NVDEC_HWACCEL + \
175 CONFIG_VP9_VAAPI_HWACCEL)
176 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
177 VP9Context *s = avctx->priv_data;
179 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
182 av_assert0(w > 0 && h > 0);
184 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
185 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
188 switch (s->pix_fmt) {
189 case AV_PIX_FMT_YUV420P:
190 #if CONFIG_VP9_DXVA2_HWACCEL
191 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
193 #if CONFIG_VP9_D3D11VA_HWACCEL
194 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
195 *fmtp++ = AV_PIX_FMT_D3D11;
197 #if CONFIG_VP9_NVDEC_HWACCEL
198 *fmtp++ = AV_PIX_FMT_CUDA;
200 #if CONFIG_VP9_VAAPI_HWACCEL
201 *fmtp++ = AV_PIX_FMT_VAAPI;
204 case AV_PIX_FMT_YUV420P10:
205 case AV_PIX_FMT_YUV420P12:
206 #if CONFIG_VP9_NVDEC_HWACCEL
207 *fmtp++ = AV_PIX_FMT_CUDA;
209 #if CONFIG_VP9_VAAPI_HWACCEL
210 *fmtp++ = AV_PIX_FMT_VAAPI;
215 *fmtp++ = s->pix_fmt;
216 *fmtp = AV_PIX_FMT_NONE;
218 ret = ff_thread_get_format(avctx, pix_fmts);
222 avctx->pix_fmt = ret;
223 s->gf_fmt = s->pix_fmt;
231 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
234 s->last_fmt = s->pix_fmt;
235 s->sb_cols = (w + 63) >> 6;
236 s->sb_rows = (h + 63) >> 6;
237 s->cols = (w + 7) >> 3;
238 s->rows = (h + 7) >> 3;
239 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
241 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
242 av_freep(&s->intra_pred_data[0]);
243 // FIXME we slightly over-allocate here for subsampled chroma, but a little
244 // bit of padding shouldn't affect performance...
245 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
246 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
248 return AVERROR(ENOMEM);
249 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
250 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
251 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
252 assign(s->above_y_nnz_ctx, uint8_t *, 16);
253 assign(s->above_mode_ctx, uint8_t *, 16);
254 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
255 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
256 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
257 assign(s->above_partition_ctx, uint8_t *, 8);
258 assign(s->above_skip_ctx, uint8_t *, 8);
259 assign(s->above_txfm_ctx, uint8_t *, 8);
260 assign(s->above_segpred_ctx, uint8_t *, 8);
261 assign(s->above_intra_ctx, uint8_t *, 8);
262 assign(s->above_comp_ctx, uint8_t *, 8);
263 assign(s->above_ref_ctx, uint8_t *, 8);
264 assign(s->above_filter_ctx, uint8_t *, 8);
265 assign(s->lflvl, VP9Filter *, lflvl_len);
269 for (i = 0; i < s->active_tile_cols; i++) {
270 av_freep(&s->td[i].b_base);
271 av_freep(&s->td[i].block_base);
275 if (s->s.h.bpp != s->last_bpp) {
276 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
277 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
278 s->last_bpp = s->s.h.bpp;
284 static int update_block_buffers(AVCodecContext *avctx)
287 VP9Context *s = avctx->priv_data;
288 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
289 VP9TileData *td = &s->td[0];
291 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
295 av_free(td->block_base);
296 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
297 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
298 if (s->s.frames[CUR_FRAME].uses_2pass) {
299 int sbs = s->sb_cols * s->sb_rows;
301 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
302 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
303 16 * 16 + 2 * chroma_eobs) * sbs);
304 if (!td->b_base || !td->block_base)
305 return AVERROR(ENOMEM);
306 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
307 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
308 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
309 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
310 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
312 for (i = 1; i < s->active_tile_cols; i++) {
313 if (s->td[i].b_base && s->td[i].block_base) {
314 av_free(s->td[i].b_base);
315 av_free(s->td[i].block_base);
318 for (i = 0; i < s->active_tile_cols; i++) {
319 s->td[i].b_base = av_malloc(sizeof(VP9Block));
320 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
321 16 * 16 + 2 * chroma_eobs);
322 if (!s->td[i].b_base || !s->td[i].block_base)
323 return AVERROR(ENOMEM);
324 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
325 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
326 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
327 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
328 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
331 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
336 // The sign bit is at the end, not the start, of a bit sequence
337 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
339 int v = get_bits(gb, n);
340 return get_bits1(gb) ? -v : v;
343 static av_always_inline int inv_recenter_nonneg(int v, int m)
348 return m - ((v + 1) >> 1);
352 // differential forward probability updates
353 static int update_prob(VP56RangeCoder *c, int p)
355 static const int inv_map_table[255] = {
356 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
357 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
358 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
359 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
360 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
361 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
362 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
363 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
364 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
365 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
366 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
367 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
368 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
369 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
370 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
371 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
372 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
373 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
378 /* This code is trying to do a differential probability update. For a
379 * current probability A in the range [1, 255], the difference to a new
380 * probability of any value can be expressed differentially as 1-A, 255-A
381 * where some part of this (absolute range) exists both in positive as
382 * well as the negative part, whereas another part only exists in one
383 * half. We're trying to code this shared part differentially, i.e.
384 * times two where the value of the lowest bit specifies the sign, and
385 * the single part is then coded on top of this. This absolute difference
386 * then again has a value of [0, 254], but a bigger value in this range
387 * indicates that we're further away from the original value A, so we
388 * can code this as a VLC code, since higher values are increasingly
389 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
390 * updates vs. the 'fine, exact' updates further down the range, which
391 * adds one extra dimension to this differential update model. */
393 if (!vp8_rac_get(c)) {
394 d = vp8_rac_get_uint(c, 4) + 0;
395 } else if (!vp8_rac_get(c)) {
396 d = vp8_rac_get_uint(c, 4) + 16;
397 } else if (!vp8_rac_get(c)) {
398 d = vp8_rac_get_uint(c, 5) + 32;
400 d = vp8_rac_get_uint(c, 7);
402 d = (d << 1) - 65 + vp8_rac_get(c);
404 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
407 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
408 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
411 static int read_colorspace_details(AVCodecContext *avctx)
413 static const enum AVColorSpace colorspaces[8] = {
414 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
415 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
417 VP9Context *s = avctx->priv_data;
418 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
421 s->s.h.bpp = 8 + bits * 2;
422 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
423 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
424 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
425 static const enum AVPixelFormat pix_fmt_rgb[3] = {
426 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
428 s->ss_h = s->ss_v = 0;
429 avctx->color_range = AVCOL_RANGE_JPEG;
430 s->pix_fmt = pix_fmt_rgb[bits];
431 if (avctx->profile & 1) {
432 if (get_bits1(&s->gb)) {
433 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
434 return AVERROR_INVALIDDATA;
437 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
439 return AVERROR_INVALIDDATA;
442 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
443 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
444 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
445 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
446 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
447 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
448 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
450 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
451 if (avctx->profile & 1) {
452 s->ss_h = get_bits1(&s->gb);
453 s->ss_v = get_bits1(&s->gb);
454 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
455 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
456 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
458 return AVERROR_INVALIDDATA;
459 } else if (get_bits1(&s->gb)) {
460 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
462 return AVERROR_INVALIDDATA;
465 s->ss_h = s->ss_v = 1;
466 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
473 static int decode_frame_header(AVCodecContext *avctx,
474 const uint8_t *data, int size, int *ref)
476 VP9Context *s = avctx->priv_data;
477 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
479 const uint8_t *data2;
482 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
483 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
486 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
487 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
488 return AVERROR_INVALIDDATA;
490 avctx->profile = get_bits1(&s->gb);
491 avctx->profile |= get_bits1(&s->gb) << 1;
492 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
493 if (avctx->profile > 3) {
494 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
495 return AVERROR_INVALIDDATA;
497 s->s.h.profile = avctx->profile;
498 if (get_bits1(&s->gb)) {
499 *ref = get_bits(&s->gb, 3);
503 s->last_keyframe = s->s.h.keyframe;
504 s->s.h.keyframe = !get_bits1(&s->gb);
506 last_invisible = s->s.h.invisible;
507 s->s.h.invisible = !get_bits1(&s->gb);
508 s->s.h.errorres = get_bits1(&s->gb);
509 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
511 if (s->s.h.keyframe) {
512 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
513 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
514 return AVERROR_INVALIDDATA;
516 if ((ret = read_colorspace_details(avctx)) < 0)
518 // for profile 1, here follows the subsampling bits
519 s->s.h.refreshrefmask = 0xff;
520 w = get_bits(&s->gb, 16) + 1;
521 h = get_bits(&s->gb, 16) + 1;
522 if (get_bits1(&s->gb)) // display size
523 skip_bits(&s->gb, 32);
525 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
526 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
527 if (s->s.h.intraonly) {
528 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
529 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
530 return AVERROR_INVALIDDATA;
532 if (avctx->profile >= 1) {
533 if ((ret = read_colorspace_details(avctx)) < 0)
536 s->ss_h = s->ss_v = 1;
539 s->bytesperpixel = 1;
540 s->pix_fmt = AV_PIX_FMT_YUV420P;
541 avctx->colorspace = AVCOL_SPC_BT470BG;
542 avctx->color_range = AVCOL_RANGE_MPEG;
544 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
545 w = get_bits(&s->gb, 16) + 1;
546 h = get_bits(&s->gb, 16) + 1;
547 if (get_bits1(&s->gb)) // display size
548 skip_bits(&s->gb, 32);
550 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
551 s->s.h.refidx[0] = get_bits(&s->gb, 3);
552 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
553 s->s.h.refidx[1] = get_bits(&s->gb, 3);
554 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
555 s->s.h.refidx[2] = get_bits(&s->gb, 3);
556 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
557 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
558 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
559 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
560 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
561 return AVERROR_INVALIDDATA;
563 if (get_bits1(&s->gb)) {
564 w = s->s.refs[s->s.h.refidx[0]].f->width;
565 h = s->s.refs[s->s.h.refidx[0]].f->height;
566 } else if (get_bits1(&s->gb)) {
567 w = s->s.refs[s->s.h.refidx[1]].f->width;
568 h = s->s.refs[s->s.h.refidx[1]].f->height;
569 } else if (get_bits1(&s->gb)) {
570 w = s->s.refs[s->s.h.refidx[2]].f->width;
571 h = s->s.refs[s->s.h.refidx[2]].f->height;
573 w = get_bits(&s->gb, 16) + 1;
574 h = get_bits(&s->gb, 16) + 1;
576 // Note that in this code, "CUR_FRAME" is actually before we
577 // have formally allocated a frame, and thus actually represents
579 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
580 s->s.frames[CUR_FRAME].tf.f->height == h;
581 if (get_bits1(&s->gb)) // display size
582 skip_bits(&s->gb, 32);
583 s->s.h.highprecisionmvs = get_bits1(&s->gb);
584 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
586 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
587 s->s.h.signbias[0] != s->s.h.signbias[2];
588 if (s->s.h.allowcompinter) {
589 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
590 s->s.h.fixcompref = 2;
591 s->s.h.varcompref[0] = 0;
592 s->s.h.varcompref[1] = 1;
593 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
594 s->s.h.fixcompref = 1;
595 s->s.h.varcompref[0] = 0;
596 s->s.h.varcompref[1] = 2;
598 s->s.h.fixcompref = 0;
599 s->s.h.varcompref[0] = 1;
600 s->s.h.varcompref[1] = 2;
605 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
606 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
607 s->s.h.framectxid = c = get_bits(&s->gb, 2);
608 if (s->s.h.keyframe || s->s.h.intraonly)
609 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
611 /* loopfilter header data */
612 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
613 // reset loopfilter defaults
614 s->s.h.lf_delta.ref[0] = 1;
615 s->s.h.lf_delta.ref[1] = 0;
616 s->s.h.lf_delta.ref[2] = -1;
617 s->s.h.lf_delta.ref[3] = -1;
618 s->s.h.lf_delta.mode[0] = 0;
619 s->s.h.lf_delta.mode[1] = 0;
620 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
622 s->s.h.filter.level = get_bits(&s->gb, 6);
623 sharp = get_bits(&s->gb, 3);
624 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
625 // the old cache values since they are still valid
626 if (s->s.h.filter.sharpness != sharp) {
627 for (i = 1; i <= 63; i++) {
631 limit >>= (sharp + 3) >> 2;
632 limit = FFMIN(limit, 9 - sharp);
634 limit = FFMAX(limit, 1);
636 s->filter_lut.lim_lut[i] = limit;
637 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
640 s->s.h.filter.sharpness = sharp;
641 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
642 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
643 for (i = 0; i < 4; i++)
644 if (get_bits1(&s->gb))
645 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
646 for (i = 0; i < 2; i++)
647 if (get_bits1(&s->gb))
648 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
652 /* quantization header data */
653 s->s.h.yac_qi = get_bits(&s->gb, 8);
654 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
655 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
656 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
657 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
658 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
660 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
662 /* segmentation header info */
663 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
664 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
665 for (i = 0; i < 7; i++)
666 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
667 get_bits(&s->gb, 8) : 255;
668 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
669 for (i = 0; i < 3; i++)
670 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
671 get_bits(&s->gb, 8) : 255;
674 if (get_bits1(&s->gb)) {
675 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
676 for (i = 0; i < 8; i++) {
677 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
678 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
679 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
680 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
681 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
682 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
683 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
688 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
689 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
690 int qyac, qydc, quvac, quvdc, lflvl, sh;
692 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
693 if (s->s.h.segmentation.absolute_vals)
694 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
696 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
698 qyac = s->s.h.yac_qi;
700 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
701 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
702 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
703 qyac = av_clip_uintp2(qyac, 8);
705 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
706 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
707 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
708 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
710 sh = s->s.h.filter.level >= 32;
711 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
712 if (s->s.h.segmentation.absolute_vals)
713 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
715 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
717 lflvl = s->s.h.filter.level;
719 if (s->s.h.lf_delta.enabled) {
720 s->s.h.segmentation.feat[i].lflvl[0][0] =
721 s->s.h.segmentation.feat[i].lflvl[0][1] =
722 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
723 for (j = 1; j < 4; j++) {
724 s->s.h.segmentation.feat[i].lflvl[j][0] =
725 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
726 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
727 s->s.h.segmentation.feat[i].lflvl[j][1] =
728 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
729 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
732 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
733 sizeof(s->s.h.segmentation.feat[i].lflvl));
738 if ((ret = update_size(avctx, w, h)) < 0) {
739 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
743 for (s->s.h.tiling.log2_tile_cols = 0;
744 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
745 s->s.h.tiling.log2_tile_cols++) ;
746 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
747 max = FFMAX(0, max - 1);
748 while (max > s->s.h.tiling.log2_tile_cols) {
749 if (get_bits1(&s->gb))
750 s->s.h.tiling.log2_tile_cols++;
754 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
755 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
756 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
761 for (i = 0; i < s->active_tile_cols; i++) {
762 av_free(s->td[i].b_base);
763 av_free(s->td[i].block_base);
768 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
769 vp9_free_entries(avctx);
770 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
771 s->s.h.tiling.tile_cols : 1;
772 vp9_alloc_entries(avctx, s->sb_rows);
773 if (avctx->active_thread_type == FF_THREAD_SLICE) {
774 n_range_coders = 4; // max_tile_rows
776 n_range_coders = s->s.h.tiling.tile_cols;
778 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
779 n_range_coders * sizeof(VP56RangeCoder));
781 return AVERROR(ENOMEM);
782 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
783 for (i = 0; i < s->active_tile_cols; i++) {
786 rc += n_range_coders;
790 /* check reference frames */
791 if (!s->s.h.keyframe && !s->s.h.intraonly) {
792 for (i = 0; i < 3; i++) {
793 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
794 int refw = ref->width, refh = ref->height;
796 if (ref->format != avctx->pix_fmt) {
797 av_log(avctx, AV_LOG_ERROR,
798 "Ref pixfmt (%s) did not match current frame (%s)",
799 av_get_pix_fmt_name(ref->format),
800 av_get_pix_fmt_name(avctx->pix_fmt));
801 return AVERROR_INVALIDDATA;
802 } else if (refw == w && refh == h) {
803 s->mvscale[i][0] = s->mvscale[i][1] = 0;
805 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
806 av_log(avctx, AV_LOG_ERROR,
807 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
809 return AVERROR_INVALIDDATA;
811 s->mvscale[i][0] = (refw << 14) / w;
812 s->mvscale[i][1] = (refh << 14) / h;
813 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
814 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
819 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
820 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
821 s->prob_ctx[3].p = ff_vp9_default_probs;
822 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
823 sizeof(ff_vp9_default_coef_probs));
824 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
825 sizeof(ff_vp9_default_coef_probs));
826 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
827 sizeof(ff_vp9_default_coef_probs));
828 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
829 sizeof(ff_vp9_default_coef_probs));
830 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
831 s->prob_ctx[c].p = ff_vp9_default_probs;
832 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
833 sizeof(ff_vp9_default_coef_probs));
836 // next 16 bits is size of the rest of the header (arith-coded)
837 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
838 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
840 data2 = align_get_bits(&s->gb);
841 if (size2 > size - (data2 - data)) {
842 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
843 return AVERROR_INVALIDDATA;
845 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
849 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
850 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
851 return AVERROR_INVALIDDATA;
854 for (i = 0; i < s->active_tile_cols; i++) {
855 if (s->s.h.keyframe || s->s.h.intraonly) {
856 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
857 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
859 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
863 /* FIXME is it faster to not copy here, but do it down in the fw updates
864 * as explicit copies if the fw update is missing (and skip the copy upon
866 s->prob.p = s->prob_ctx[c].p;
869 if (s->s.h.lossless) {
870 s->s.h.txfmmode = TX_4X4;
872 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
873 if (s->s.h.txfmmode == 3)
874 s->s.h.txfmmode += vp8_rac_get(&s->c);
876 if (s->s.h.txfmmode == TX_SWITCHABLE) {
877 for (i = 0; i < 2; i++)
878 if (vp56_rac_get_prob_branchy(&s->c, 252))
879 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
880 for (i = 0; i < 2; i++)
881 for (j = 0; j < 2; j++)
882 if (vp56_rac_get_prob_branchy(&s->c, 252))
883 s->prob.p.tx16p[i][j] =
884 update_prob(&s->c, s->prob.p.tx16p[i][j]);
885 for (i = 0; i < 2; i++)
886 for (j = 0; j < 3; j++)
887 if (vp56_rac_get_prob_branchy(&s->c, 252))
888 s->prob.p.tx32p[i][j] =
889 update_prob(&s->c, s->prob.p.tx32p[i][j]);
894 for (i = 0; i < 4; i++) {
895 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
896 if (vp8_rac_get(&s->c)) {
897 for (j = 0; j < 2; j++)
898 for (k = 0; k < 2; k++)
899 for (l = 0; l < 6; l++)
900 for (m = 0; m < 6; m++) {
901 uint8_t *p = s->prob.coef[i][j][k][l][m];
902 uint8_t *r = ref[j][k][l][m];
903 if (m >= 3 && l == 0) // dc only has 3 pt
905 for (n = 0; n < 3; n++) {
906 if (vp56_rac_get_prob_branchy(&s->c, 252))
907 p[n] = update_prob(&s->c, r[n]);
911 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
914 for (j = 0; j < 2; j++)
915 for (k = 0; k < 2; k++)
916 for (l = 0; l < 6; l++)
917 for (m = 0; m < 6; m++) {
918 uint8_t *p = s->prob.coef[i][j][k][l][m];
919 uint8_t *r = ref[j][k][l][m];
920 if (m > 3 && l == 0) // dc only has 3 pt
923 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
926 if (s->s.h.txfmmode == i)
931 for (i = 0; i < 3; i++)
932 if (vp56_rac_get_prob_branchy(&s->c, 252))
933 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
934 if (!s->s.h.keyframe && !s->s.h.intraonly) {
935 for (i = 0; i < 7; i++)
936 for (j = 0; j < 3; j++)
937 if (vp56_rac_get_prob_branchy(&s->c, 252))
938 s->prob.p.mv_mode[i][j] =
939 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
941 if (s->s.h.filtermode == FILTER_SWITCHABLE)
942 for (i = 0; i < 4; i++)
943 for (j = 0; j < 2; j++)
944 if (vp56_rac_get_prob_branchy(&s->c, 252))
945 s->prob.p.filter[i][j] =
946 update_prob(&s->c, s->prob.p.filter[i][j]);
948 for (i = 0; i < 4; i++)
949 if (vp56_rac_get_prob_branchy(&s->c, 252))
950 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
952 if (s->s.h.allowcompinter) {
953 s->s.h.comppredmode = vp8_rac_get(&s->c);
954 if (s->s.h.comppredmode)
955 s->s.h.comppredmode += vp8_rac_get(&s->c);
956 if (s->s.h.comppredmode == PRED_SWITCHABLE)
957 for (i = 0; i < 5; i++)
958 if (vp56_rac_get_prob_branchy(&s->c, 252))
960 update_prob(&s->c, s->prob.p.comp[i]);
962 s->s.h.comppredmode = PRED_SINGLEREF;
965 if (s->s.h.comppredmode != PRED_COMPREF) {
966 for (i = 0; i < 5; i++) {
967 if (vp56_rac_get_prob_branchy(&s->c, 252))
968 s->prob.p.single_ref[i][0] =
969 update_prob(&s->c, s->prob.p.single_ref[i][0]);
970 if (vp56_rac_get_prob_branchy(&s->c, 252))
971 s->prob.p.single_ref[i][1] =
972 update_prob(&s->c, s->prob.p.single_ref[i][1]);
976 if (s->s.h.comppredmode != PRED_SINGLEREF) {
977 for (i = 0; i < 5; i++)
978 if (vp56_rac_get_prob_branchy(&s->c, 252))
979 s->prob.p.comp_ref[i] =
980 update_prob(&s->c, s->prob.p.comp_ref[i]);
983 for (i = 0; i < 4; i++)
984 for (j = 0; j < 9; j++)
985 if (vp56_rac_get_prob_branchy(&s->c, 252))
986 s->prob.p.y_mode[i][j] =
987 update_prob(&s->c, s->prob.p.y_mode[i][j]);
989 for (i = 0; i < 4; i++)
990 for (j = 0; j < 4; j++)
991 for (k = 0; k < 3; k++)
992 if (vp56_rac_get_prob_branchy(&s->c, 252))
993 s->prob.p.partition[3 - i][j][k] =
995 s->prob.p.partition[3 - i][j][k]);
997 // mv fields don't use the update_prob subexp model for some reason
998 for (i = 0; i < 3; i++)
999 if (vp56_rac_get_prob_branchy(&s->c, 252))
1000 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1002 for (i = 0; i < 2; i++) {
1003 if (vp56_rac_get_prob_branchy(&s->c, 252))
1004 s->prob.p.mv_comp[i].sign =
1005 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1007 for (j = 0; j < 10; j++)
1008 if (vp56_rac_get_prob_branchy(&s->c, 252))
1009 s->prob.p.mv_comp[i].classes[j] =
1010 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1012 if (vp56_rac_get_prob_branchy(&s->c, 252))
1013 s->prob.p.mv_comp[i].class0 =
1014 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1016 for (j = 0; j < 10; j++)
1017 if (vp56_rac_get_prob_branchy(&s->c, 252))
1018 s->prob.p.mv_comp[i].bits[j] =
1019 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1022 for (i = 0; i < 2; i++) {
1023 for (j = 0; j < 2; j++)
1024 for (k = 0; k < 3; k++)
1025 if (vp56_rac_get_prob_branchy(&s->c, 252))
1026 s->prob.p.mv_comp[i].class0_fp[j][k] =
1027 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1029 for (j = 0; j < 3; j++)
1030 if (vp56_rac_get_prob_branchy(&s->c, 252))
1031 s->prob.p.mv_comp[i].fp[j] =
1032 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1035 if (s->s.h.highprecisionmvs) {
1036 for (i = 0; i < 2; i++) {
1037 if (vp56_rac_get_prob_branchy(&s->c, 252))
1038 s->prob.p.mv_comp[i].class0_hp =
1039 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1041 if (vp56_rac_get_prob_branchy(&s->c, 252))
1042 s->prob.p.mv_comp[i].hp =
1043 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1048 return (data2 - data) + size2;
1051 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1052 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1054 const VP9Context *s = td->s;
1055 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1056 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1057 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1058 s->prob.p.partition[bl][c];
1059 enum BlockPartition bp;
1060 ptrdiff_t hbs = 4 >> bl;
1061 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1062 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1063 int bytesperpixel = s->bytesperpixel;
1066 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1067 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1068 } else if (col + hbs < s->cols) { // FIXME why not <=?
1069 if (row + hbs < s->rows) { // FIXME why not <=?
1070 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1072 case PARTITION_NONE:
1073 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1076 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1077 yoff += hbs * 8 * y_stride;
1078 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1079 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1082 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1083 yoff += hbs * 8 * bytesperpixel;
1084 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1085 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1087 case PARTITION_SPLIT:
1088 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1089 decode_sb(td, row, col + hbs, lflvl,
1090 yoff + 8 * hbs * bytesperpixel,
1091 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1092 yoff += hbs * 8 * y_stride;
1093 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1094 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1095 decode_sb(td, row + hbs, col + hbs, lflvl,
1096 yoff + 8 * hbs * bytesperpixel,
1097 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1102 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1103 bp = PARTITION_SPLIT;
1104 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1105 decode_sb(td, row, col + hbs, lflvl,
1106 yoff + 8 * hbs * bytesperpixel,
1107 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1110 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1112 } else if (row + hbs < s->rows) { // FIXME why not <=?
1113 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1114 bp = PARTITION_SPLIT;
1115 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1116 yoff += hbs * 8 * y_stride;
1117 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1118 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1121 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1124 bp = PARTITION_SPLIT;
1125 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1127 td->counts.partition[bl][c][bp]++;
1130 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1131 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1133 const VP9Context *s = td->s;
1134 VP9Block *b = td->b;
1135 ptrdiff_t hbs = 4 >> bl;
1136 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1137 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1138 int bytesperpixel = s->bytesperpixel;
1141 av_assert2(b->bl == BL_8X8);
1142 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1143 } else if (td->b->bl == bl) {
1144 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1145 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1146 yoff += hbs * 8 * y_stride;
1147 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1148 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1149 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1150 yoff += hbs * 8 * bytesperpixel;
1151 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1152 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1155 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1156 if (col + hbs < s->cols) { // FIXME why not <=?
1157 if (row + hbs < s->rows) {
1158 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1159 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1160 yoff += hbs * 8 * y_stride;
1161 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1162 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1163 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1164 yoff + 8 * hbs * bytesperpixel,
1165 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1167 yoff += hbs * 8 * bytesperpixel;
1168 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1169 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1171 } else if (row + hbs < s->rows) {
1172 yoff += hbs * 8 * y_stride;
1173 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1174 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1179 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1181 int sb_start = ( idx * n) >> log2_n;
1182 int sb_end = ((idx + 1) * n) >> log2_n;
1183 *start = FFMIN(sb_start, n) << 3;
1184 *end = FFMIN(sb_end, n) << 3;
1187 static void free_buffers(VP9Context *s)
1191 av_freep(&s->intra_pred_data[0]);
1192 for (i = 0; i < s->active_tile_cols; i++) {
1193 av_freep(&s->td[i].b_base);
1194 av_freep(&s->td[i].block_base);
1198 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1200 VP9Context *s = avctx->priv_data;
1203 for (i = 0; i < 3; i++) {
1204 if (s->s.frames[i].tf.f->buf[0])
1205 vp9_frame_unref(avctx, &s->s.frames[i]);
1206 av_frame_free(&s->s.frames[i].tf.f);
1208 for (i = 0; i < 8; i++) {
1209 if (s->s.refs[i].f->buf[0])
1210 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1211 av_frame_free(&s->s.refs[i].f);
1212 if (s->next_refs[i].f->buf[0])
1213 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1214 av_frame_free(&s->next_refs[i].f);
1218 vp9_free_entries(avctx);
1223 static int decode_tiles(AVCodecContext *avctx,
1224 const uint8_t *data, int size)
1226 VP9Context *s = avctx->priv_data;
1227 VP9TileData *td = &s->td[0];
1228 int row, col, tile_row, tile_col, ret;
1230 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1232 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1234 f = s->s.frames[CUR_FRAME].tf.f;
1235 ls_y = f->linesize[0];
1236 ls_uv =f->linesize[1];
1237 bytesperpixel = s->bytesperpixel;
1240 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1241 set_tile_offset(&tile_row_start, &tile_row_end,
1242 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1244 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1247 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1248 tile_row == s->s.h.tiling.tile_rows - 1) {
1251 tile_size = AV_RB32(data);
1255 if (tile_size > size) {
1256 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1257 return AVERROR_INVALIDDATA;
1259 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1262 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1263 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1264 return AVERROR_INVALIDDATA;
1270 for (row = tile_row_start; row < tile_row_end;
1271 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1272 VP9Filter *lflvl_ptr = s->lflvl;
1273 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1275 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1276 set_tile_offset(&tile_col_start, &tile_col_end,
1277 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1278 td->tile_col_start = tile_col_start;
1280 memset(td->left_partition_ctx, 0, 8);
1281 memset(td->left_skip_ctx, 0, 8);
1282 if (s->s.h.keyframe || s->s.h.intraonly) {
1283 memset(td->left_mode_ctx, DC_PRED, 16);
1285 memset(td->left_mode_ctx, NEARESTMV, 8);
1287 memset(td->left_y_nnz_ctx, 0, 16);
1288 memset(td->left_uv_nnz_ctx, 0, 32);
1289 memset(td->left_segpred_ctx, 0, 8);
1291 td->c = &td->c_b[tile_col];
1294 for (col = tile_col_start;
1296 col += 8, yoff2 += 64 * bytesperpixel,
1297 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1298 // FIXME integrate with lf code (i.e. zero after each
1299 // use, similar to invtxfm coefficients, or similar)
1301 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1305 decode_sb_mem(td, row, col, lflvl_ptr,
1306 yoff2, uvoff2, BL_64X64);
1308 decode_sb(td, row, col, lflvl_ptr,
1309 yoff2, uvoff2, BL_64X64);
1317 // backup pre-loopfilter reconstruction data for intra
1318 // prediction of next row of sb64s
1319 if (row + 8 < s->rows) {
1320 memcpy(s->intra_pred_data[0],
1321 f->data[0] + yoff + 63 * ls_y,
1322 8 * s->cols * bytesperpixel);
1323 memcpy(s->intra_pred_data[1],
1324 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1325 8 * s->cols * bytesperpixel >> s->ss_h);
1326 memcpy(s->intra_pred_data[2],
1327 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1328 8 * s->cols * bytesperpixel >> s->ss_h);
1331 // loopfilter one row
1332 if (s->s.h.filter.level) {
1335 lflvl_ptr = s->lflvl;
1336 for (col = 0; col < s->cols;
1337 col += 8, yoff2 += 64 * bytesperpixel,
1338 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1339 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1344 // FIXME maybe we can make this more finegrained by running the
1345 // loopfilter per-block instead of after each sbrow
1346 // In fact that would also make intra pred left preparation easier?
1347 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1354 static av_always_inline
1355 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1358 VP9Context *s = avctx->priv_data;
1359 VP9TileData *td = &s->td[jobnr];
1360 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1361 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1362 unsigned tile_cols_len;
1363 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1364 VP9Filter *lflvl_ptr_base;
1367 f = s->s.frames[CUR_FRAME].tf.f;
1368 ls_y = f->linesize[0];
1369 ls_uv =f->linesize[1];
1371 set_tile_offset(&tile_col_start, &tile_col_end,
1372 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1373 td->tile_col_start = tile_col_start;
1374 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1375 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1376 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1378 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1379 set_tile_offset(&tile_row_start, &tile_row_end,
1380 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1382 td->c = &td->c_b[tile_row];
1383 for (row = tile_row_start; row < tile_row_end;
1384 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1385 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1386 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1388 memset(td->left_partition_ctx, 0, 8);
1389 memset(td->left_skip_ctx, 0, 8);
1390 if (s->s.h.keyframe || s->s.h.intraonly) {
1391 memset(td->left_mode_ctx, DC_PRED, 16);
1393 memset(td->left_mode_ctx, NEARESTMV, 8);
1395 memset(td->left_y_nnz_ctx, 0, 16);
1396 memset(td->left_uv_nnz_ctx, 0, 32);
1397 memset(td->left_segpred_ctx, 0, 8);
1399 for (col = tile_col_start;
1401 col += 8, yoff2 += 64 * bytesperpixel,
1402 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1403 // FIXME integrate with lf code (i.e. zero after each
1404 // use, similar to invtxfm coefficients, or similar)
1405 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1406 decode_sb(td, row, col, lflvl_ptr,
1407 yoff2, uvoff2, BL_64X64);
1410 // backup pre-loopfilter reconstruction data for intra
1411 // prediction of next row of sb64s
1412 tile_cols_len = tile_col_end - tile_col_start;
1413 if (row + 8 < s->rows) {
1414 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1415 f->data[0] + yoff + 63 * ls_y,
1416 8 * tile_cols_len * bytesperpixel);
1417 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1418 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1419 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1420 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1421 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1422 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1425 vp9_report_tile_progress(s, row >> 3, 1);
1431 static av_always_inline
1432 int loopfilter_proc(AVCodecContext *avctx)
1434 VP9Context *s = avctx->priv_data;
1435 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1436 VP9Filter *lflvl_ptr;
1437 int bytesperpixel = s->bytesperpixel, col, i;
1440 f = s->s.frames[CUR_FRAME].tf.f;
1441 ls_y = f->linesize[0];
1442 ls_uv =f->linesize[1];
1444 for (i = 0; i < s->sb_rows; i++) {
1445 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1447 if (s->s.h.filter.level) {
1448 yoff = (ls_y * 64)*i;
1449 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1450 lflvl_ptr = s->lflvl+s->sb_cols*i;
1451 for (col = 0; col < s->cols;
1452 col += 8, yoff += 64 * bytesperpixel,
1453 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1454 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1463 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1464 int *got_frame, AVPacket *pkt)
1466 const uint8_t *data = pkt->data;
1467 int size = pkt->size;
1468 VP9Context *s = avctx->priv_data;
1470 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1471 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1474 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1476 } else if (ret == 0) {
1477 if (!s->s.refs[ref].f->buf[0]) {
1478 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1479 return AVERROR_INVALIDDATA;
1481 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1483 ((AVFrame *)frame)->pts = pkt->pts;
1485 FF_DISABLE_DEPRECATION_WARNINGS
1486 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1487 FF_ENABLE_DEPRECATION_WARNINGS
1489 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1490 for (i = 0; i < 8; i++) {
1491 if (s->next_refs[i].f->buf[0])
1492 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1493 if (s->s.refs[i].f->buf[0] &&
1494 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1503 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1504 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1505 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1506 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1507 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1510 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1511 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1512 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1513 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1515 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1516 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1517 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1519 f = s->s.frames[CUR_FRAME].tf.f;
1520 f->key_frame = s->s.h.keyframe;
1521 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1523 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1524 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1525 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1526 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1530 for (i = 0; i < 8; i++) {
1531 if (s->next_refs[i].f->buf[0])
1532 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1533 if (s->s.h.refreshrefmask & (1 << i)) {
1534 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1535 } else if (s->s.refs[i].f->buf[0]) {
1536 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1542 if (avctx->hwaccel) {
1543 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1546 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1549 ret = avctx->hwaccel->end_frame(avctx);
1555 // main tile decode loop
1556 memset(s->above_partition_ctx, 0, s->cols);
1557 memset(s->above_skip_ctx, 0, s->cols);
1558 if (s->s.h.keyframe || s->s.h.intraonly) {
1559 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1561 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1563 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1564 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1565 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1566 memset(s->above_segpred_ctx, 0, s->cols);
1567 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1568 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1569 if ((ret = update_block_buffers(avctx)) < 0) {
1570 av_log(avctx, AV_LOG_ERROR,
1571 "Failed to allocate block buffers\n");
1574 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1577 for (i = 0; i < 4; i++) {
1578 for (j = 0; j < 2; j++)
1579 for (k = 0; k < 2; k++)
1580 for (l = 0; l < 6; l++)
1581 for (m = 0; m < 6; m++)
1582 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1583 s->prob.coef[i][j][k][l][m], 3);
1584 if (s->s.h.txfmmode == i)
1587 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1588 ff_thread_finish_setup(avctx);
1589 } else if (!s->s.h.refreshctx) {
1590 ff_thread_finish_setup(avctx);
1594 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1595 for (i = 0; i < s->sb_rows; i++)
1596 atomic_store(&s->entries[i], 0);
1601 for (i = 0; i < s->active_tile_cols; i++) {
1602 s->td[i].b = s->td[i].b_base;
1603 s->td[i].block = s->td[i].block_base;
1604 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1605 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1606 s->td[i].eob = s->td[i].eob_base;
1607 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1608 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1612 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1613 int tile_row, tile_col;
1615 av_assert1(!s->pass);
1617 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1618 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1621 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1622 tile_row == s->s.h.tiling.tile_rows - 1) {
1625 tile_size = AV_RB32(data);
1629 if (tile_size > size)
1630 return AVERROR_INVALIDDATA;
1631 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1634 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1635 return AVERROR_INVALIDDATA;
1641 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1645 ret = decode_tiles(avctx, data, size);
1650 // Sum all counts fields into td[0].counts for tile threading
1651 if (avctx->active_thread_type == FF_THREAD_SLICE)
1652 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1653 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1654 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1656 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1657 ff_vp9_adapt_probs(s);
1658 ff_thread_finish_setup(avctx);
1660 } while (s->pass++ == 1);
1661 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1665 for (i = 0; i < 8; i++) {
1666 if (s->s.refs[i].f->buf[0])
1667 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1668 if (s->next_refs[i].f->buf[0] &&
1669 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1673 if (!s->s.h.invisible) {
1674 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1682 static void vp9_decode_flush(AVCodecContext *avctx)
1684 VP9Context *s = avctx->priv_data;
1687 for (i = 0; i < 3; i++)
1688 vp9_frame_unref(avctx, &s->s.frames[i]);
1689 for (i = 0; i < 8; i++)
1690 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1693 static int init_frames(AVCodecContext *avctx)
1695 VP9Context *s = avctx->priv_data;
1698 for (i = 0; i < 3; i++) {
1699 s->s.frames[i].tf.f = av_frame_alloc();
1700 if (!s->s.frames[i].tf.f) {
1701 vp9_decode_free(avctx);
1702 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1703 return AVERROR(ENOMEM);
1706 for (i = 0; i < 8; i++) {
1707 s->s.refs[i].f = av_frame_alloc();
1708 s->next_refs[i].f = av_frame_alloc();
1709 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1710 vp9_decode_free(avctx);
1711 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1712 return AVERROR(ENOMEM);
1719 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1721 VP9Context *s = avctx->priv_data;
1723 avctx->internal->allocate_progress = 1;
1725 s->s.h.filter.sharpness = -1;
1727 return init_frames(avctx);
1731 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1733 return init_frames(avctx);
1736 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1739 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1741 for (i = 0; i < 3; i++) {
1742 if (s->s.frames[i].tf.f->buf[0])
1743 vp9_frame_unref(dst, &s->s.frames[i]);
1744 if (ssrc->s.frames[i].tf.f->buf[0]) {
1745 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1749 for (i = 0; i < 8; i++) {
1750 if (s->s.refs[i].f->buf[0])
1751 ff_thread_release_buffer(dst, &s->s.refs[i]);
1752 if (ssrc->next_refs[i].f->buf[0]) {
1753 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1758 s->s.h.invisible = ssrc->s.h.invisible;
1759 s->s.h.keyframe = ssrc->s.h.keyframe;
1760 s->s.h.intraonly = ssrc->s.h.intraonly;
1761 s->ss_v = ssrc->ss_v;
1762 s->ss_h = ssrc->ss_h;
1763 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1764 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1765 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1766 s->bytesperpixel = ssrc->bytesperpixel;
1767 s->gf_fmt = ssrc->gf_fmt;
1770 s->s.h.bpp = ssrc->s.h.bpp;
1771 s->bpp_index = ssrc->bpp_index;
1772 s->pix_fmt = ssrc->pix_fmt;
1773 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1774 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1775 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1776 sizeof(s->s.h.segmentation.feat));
1782 AVCodec ff_vp9_decoder = {
1784 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1785 .type = AVMEDIA_TYPE_VIDEO,
1786 .id = AV_CODEC_ID_VP9,
1787 .priv_data_size = sizeof(VP9Context),
1788 .init = vp9_decode_init,
1789 .close = vp9_decode_free,
1790 .decode = vp9_decode_frame,
1791 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1792 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1793 .flush = vp9_decode_flush,
1794 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1795 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1796 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),