2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
38 #define VP9_SYNCCODE 0x498342
41 static void vp9_free_entries(AVCodecContext *avctx) {
42 VP9Context *s = avctx->priv_data;
44 if (avctx->active_thread_type & FF_THREAD_SLICE) {
45 pthread_mutex_destroy(&s->progress_mutex);
46 pthread_cond_destroy(&s->progress_cond);
47 av_freep(&s->entries);
51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
52 VP9Context *s = avctx->priv_data;
55 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 av_freep(&s->entries);
59 s->entries = av_malloc_array(n, sizeof(atomic_int));
62 av_freep(&s->entries);
63 return AVERROR(ENOMEM);
66 for (i = 0; i < n; i++)
67 atomic_init(&s->entries[i], 0);
69 pthread_mutex_init(&s->progress_mutex, NULL);
70 pthread_cond_init(&s->progress_cond, NULL);
75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
76 pthread_mutex_lock(&s->progress_mutex);
77 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
78 pthread_cond_signal(&s->progress_cond);
79 pthread_mutex_unlock(&s->progress_mutex);
82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
83 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
86 pthread_mutex_lock(&s->progress_mutex);
87 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
88 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
89 pthread_mutex_unlock(&s->progress_mutex);
92 static void vp9_free_entries(AVCodecContext *avctx) {}
93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
98 ff_thread_release_buffer(avctx, &f->tf);
99 av_buffer_unref(&f->extradata);
100 av_buffer_unref(&f->hwaccel_priv_buf);
101 f->segmentation_map = NULL;
102 f->hwaccel_picture_private = NULL;
105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
107 VP9Context *s = avctx->priv_data;
110 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
114 sz = 64 * s->sb_cols * s->sb_rows;
115 f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
120 f->segmentation_map = f->extradata->data;
121 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
123 if (avctx->hwaccel) {
124 const AVHWAccel *hwaccel = avctx->hwaccel;
125 av_assert0(!f->hwaccel_picture_private);
126 if (hwaccel->frame_priv_data_size) {
127 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
128 if (!f->hwaccel_priv_buf)
130 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
137 vp9_frame_unref(avctx, f);
138 return AVERROR(ENOMEM);
141 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
145 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
149 dst->extradata = av_buffer_ref(src->extradata);
153 dst->segmentation_map = src->segmentation_map;
155 dst->uses_2pass = src->uses_2pass;
157 if (src->hwaccel_picture_private) {
158 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
159 if (!dst->hwaccel_priv_buf)
161 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
167 vp9_frame_unref(avctx, dst);
168 return AVERROR(ENOMEM);
171 static int update_size(AVCodecContext *avctx, int w, int h)
173 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
174 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
175 CONFIG_VP9_NVDEC_HWACCEL + \
176 CONFIG_VP9_VAAPI_HWACCEL)
177 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
178 VP9Context *s = avctx->priv_data;
180 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
183 av_assert0(w > 0 && h > 0);
185 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
186 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
189 switch (s->pix_fmt) {
190 case AV_PIX_FMT_YUV420P:
191 case AV_PIX_FMT_YUV420P10:
192 #if CONFIG_VP9_DXVA2_HWACCEL
193 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
195 #if CONFIG_VP9_D3D11VA_HWACCEL
196 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
197 *fmtp++ = AV_PIX_FMT_D3D11;
199 #if CONFIG_VP9_NVDEC_HWACCEL
200 *fmtp++ = AV_PIX_FMT_CUDA;
202 #if CONFIG_VP9_VAAPI_HWACCEL
203 *fmtp++ = AV_PIX_FMT_VAAPI;
206 case AV_PIX_FMT_YUV420P12:
207 #if CONFIG_VP9_NVDEC_HWACCEL
208 *fmtp++ = AV_PIX_FMT_CUDA;
210 #if CONFIG_VP9_VAAPI_HWACCEL
211 *fmtp++ = AV_PIX_FMT_VAAPI;
216 *fmtp++ = s->pix_fmt;
217 *fmtp = AV_PIX_FMT_NONE;
219 ret = ff_thread_get_format(avctx, pix_fmts);
223 avctx->pix_fmt = ret;
224 s->gf_fmt = s->pix_fmt;
232 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
235 s->last_fmt = s->pix_fmt;
236 s->sb_cols = (w + 63) >> 6;
237 s->sb_rows = (h + 63) >> 6;
238 s->cols = (w + 7) >> 3;
239 s->rows = (h + 7) >> 3;
240 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
242 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
243 av_freep(&s->intra_pred_data[0]);
244 // FIXME we slightly over-allocate here for subsampled chroma, but a little
245 // bit of padding shouldn't affect performance...
246 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
247 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
249 return AVERROR(ENOMEM);
250 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
251 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
252 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
253 assign(s->above_y_nnz_ctx, uint8_t *, 16);
254 assign(s->above_mode_ctx, uint8_t *, 16);
255 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
256 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
257 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
258 assign(s->above_partition_ctx, uint8_t *, 8);
259 assign(s->above_skip_ctx, uint8_t *, 8);
260 assign(s->above_txfm_ctx, uint8_t *, 8);
261 assign(s->above_segpred_ctx, uint8_t *, 8);
262 assign(s->above_intra_ctx, uint8_t *, 8);
263 assign(s->above_comp_ctx, uint8_t *, 8);
264 assign(s->above_ref_ctx, uint8_t *, 8);
265 assign(s->above_filter_ctx, uint8_t *, 8);
266 assign(s->lflvl, VP9Filter *, lflvl_len);
270 for (i = 0; i < s->active_tile_cols; i++) {
271 av_freep(&s->td[i].b_base);
272 av_freep(&s->td[i].block_base);
276 if (s->s.h.bpp != s->last_bpp) {
277 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
278 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
279 s->last_bpp = s->s.h.bpp;
285 static int update_block_buffers(AVCodecContext *avctx)
288 VP9Context *s = avctx->priv_data;
289 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
290 VP9TileData *td = &s->td[0];
292 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
296 av_free(td->block_base);
297 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
298 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
299 if (s->s.frames[CUR_FRAME].uses_2pass) {
300 int sbs = s->sb_cols * s->sb_rows;
302 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
303 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
304 16 * 16 + 2 * chroma_eobs) * sbs);
305 if (!td->b_base || !td->block_base)
306 return AVERROR(ENOMEM);
307 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
308 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
309 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
310 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
311 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
313 for (i = 1; i < s->active_tile_cols; i++) {
314 if (s->td[i].b_base && s->td[i].block_base) {
315 av_free(s->td[i].b_base);
316 av_free(s->td[i].block_base);
319 for (i = 0; i < s->active_tile_cols; i++) {
320 s->td[i].b_base = av_malloc(sizeof(VP9Block));
321 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
322 16 * 16 + 2 * chroma_eobs);
323 if (!s->td[i].b_base || !s->td[i].block_base)
324 return AVERROR(ENOMEM);
325 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
326 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
327 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
328 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
329 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
332 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
337 // The sign bit is at the end, not the start, of a bit sequence
338 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
340 int v = get_bits(gb, n);
341 return get_bits1(gb) ? -v : v;
344 static av_always_inline int inv_recenter_nonneg(int v, int m)
349 return m - ((v + 1) >> 1);
353 // differential forward probability updates
354 static int update_prob(VP56RangeCoder *c, int p)
356 static const int inv_map_table[255] = {
357 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
358 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
359 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
360 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
361 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
362 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
363 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
364 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
365 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
366 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
367 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
368 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
369 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
370 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
371 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
372 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
373 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
374 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
379 /* This code is trying to do a differential probability update. For a
380 * current probability A in the range [1, 255], the difference to a new
381 * probability of any value can be expressed differentially as 1-A, 255-A
382 * where some part of this (absolute range) exists both in positive as
383 * well as the negative part, whereas another part only exists in one
384 * half. We're trying to code this shared part differentially, i.e.
385 * times two where the value of the lowest bit specifies the sign, and
386 * the single part is then coded on top of this. This absolute difference
387 * then again has a value of [0, 254], but a bigger value in this range
388 * indicates that we're further away from the original value A, so we
389 * can code this as a VLC code, since higher values are increasingly
390 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
391 * updates vs. the 'fine, exact' updates further down the range, which
392 * adds one extra dimension to this differential update model. */
394 if (!vp8_rac_get(c)) {
395 d = vp8_rac_get_uint(c, 4) + 0;
396 } else if (!vp8_rac_get(c)) {
397 d = vp8_rac_get_uint(c, 4) + 16;
398 } else if (!vp8_rac_get(c)) {
399 d = vp8_rac_get_uint(c, 5) + 32;
401 d = vp8_rac_get_uint(c, 7);
403 d = (d << 1) - 65 + vp8_rac_get(c);
405 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
408 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
409 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
412 static int read_colorspace_details(AVCodecContext *avctx)
414 static const enum AVColorSpace colorspaces[8] = {
415 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
416 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
418 VP9Context *s = avctx->priv_data;
419 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
422 s->s.h.bpp = 8 + bits * 2;
423 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
424 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
425 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
426 static const enum AVPixelFormat pix_fmt_rgb[3] = {
427 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
429 s->ss_h = s->ss_v = 0;
430 avctx->color_range = AVCOL_RANGE_JPEG;
431 s->pix_fmt = pix_fmt_rgb[bits];
432 if (avctx->profile & 1) {
433 if (get_bits1(&s->gb)) {
434 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
435 return AVERROR_INVALIDDATA;
438 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
440 return AVERROR_INVALIDDATA;
443 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
444 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
445 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
446 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
447 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
448 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
449 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
451 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
452 if (avctx->profile & 1) {
453 s->ss_h = get_bits1(&s->gb);
454 s->ss_v = get_bits1(&s->gb);
455 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
456 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
457 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
459 return AVERROR_INVALIDDATA;
460 } else if (get_bits1(&s->gb)) {
461 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
463 return AVERROR_INVALIDDATA;
466 s->ss_h = s->ss_v = 1;
467 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
474 static int decode_frame_header(AVCodecContext *avctx,
475 const uint8_t *data, int size, int *ref)
477 VP9Context *s = avctx->priv_data;
478 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
480 const uint8_t *data2;
483 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
484 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
487 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
488 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
489 return AVERROR_INVALIDDATA;
491 avctx->profile = get_bits1(&s->gb);
492 avctx->profile |= get_bits1(&s->gb) << 1;
493 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
494 if (avctx->profile > 3) {
495 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
496 return AVERROR_INVALIDDATA;
498 s->s.h.profile = avctx->profile;
499 if (get_bits1(&s->gb)) {
500 *ref = get_bits(&s->gb, 3);
504 s->last_keyframe = s->s.h.keyframe;
505 s->s.h.keyframe = !get_bits1(&s->gb);
507 last_invisible = s->s.h.invisible;
508 s->s.h.invisible = !get_bits1(&s->gb);
509 s->s.h.errorres = get_bits1(&s->gb);
510 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
512 if (s->s.h.keyframe) {
513 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
514 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
515 return AVERROR_INVALIDDATA;
517 if ((ret = read_colorspace_details(avctx)) < 0)
519 // for profile 1, here follows the subsampling bits
520 s->s.h.refreshrefmask = 0xff;
521 w = get_bits(&s->gb, 16) + 1;
522 h = get_bits(&s->gb, 16) + 1;
523 if (get_bits1(&s->gb)) // display size
524 skip_bits(&s->gb, 32);
526 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
527 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
528 if (s->s.h.intraonly) {
529 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
530 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
531 return AVERROR_INVALIDDATA;
533 if (avctx->profile >= 1) {
534 if ((ret = read_colorspace_details(avctx)) < 0)
537 s->ss_h = s->ss_v = 1;
540 s->bytesperpixel = 1;
541 s->pix_fmt = AV_PIX_FMT_YUV420P;
542 avctx->colorspace = AVCOL_SPC_BT470BG;
543 avctx->color_range = AVCOL_RANGE_MPEG;
545 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
546 w = get_bits(&s->gb, 16) + 1;
547 h = get_bits(&s->gb, 16) + 1;
548 if (get_bits1(&s->gb)) // display size
549 skip_bits(&s->gb, 32);
551 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
552 s->s.h.refidx[0] = get_bits(&s->gb, 3);
553 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
554 s->s.h.refidx[1] = get_bits(&s->gb, 3);
555 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
556 s->s.h.refidx[2] = get_bits(&s->gb, 3);
557 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
558 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
559 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
560 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
561 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
562 return AVERROR_INVALIDDATA;
564 if (get_bits1(&s->gb)) {
565 w = s->s.refs[s->s.h.refidx[0]].f->width;
566 h = s->s.refs[s->s.h.refidx[0]].f->height;
567 } else if (get_bits1(&s->gb)) {
568 w = s->s.refs[s->s.h.refidx[1]].f->width;
569 h = s->s.refs[s->s.h.refidx[1]].f->height;
570 } else if (get_bits1(&s->gb)) {
571 w = s->s.refs[s->s.h.refidx[2]].f->width;
572 h = s->s.refs[s->s.h.refidx[2]].f->height;
574 w = get_bits(&s->gb, 16) + 1;
575 h = get_bits(&s->gb, 16) + 1;
577 // Note that in this code, "CUR_FRAME" is actually before we
578 // have formally allocated a frame, and thus actually represents
580 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
581 s->s.frames[CUR_FRAME].tf.f->height == h;
582 if (get_bits1(&s->gb)) // display size
583 skip_bits(&s->gb, 32);
584 s->s.h.highprecisionmvs = get_bits1(&s->gb);
585 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
587 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
588 s->s.h.signbias[0] != s->s.h.signbias[2];
589 if (s->s.h.allowcompinter) {
590 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
591 s->s.h.fixcompref = 2;
592 s->s.h.varcompref[0] = 0;
593 s->s.h.varcompref[1] = 1;
594 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
595 s->s.h.fixcompref = 1;
596 s->s.h.varcompref[0] = 0;
597 s->s.h.varcompref[1] = 2;
599 s->s.h.fixcompref = 0;
600 s->s.h.varcompref[0] = 1;
601 s->s.h.varcompref[1] = 2;
606 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
607 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
608 s->s.h.framectxid = c = get_bits(&s->gb, 2);
609 if (s->s.h.keyframe || s->s.h.intraonly)
610 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
612 /* loopfilter header data */
613 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
614 // reset loopfilter defaults
615 s->s.h.lf_delta.ref[0] = 1;
616 s->s.h.lf_delta.ref[1] = 0;
617 s->s.h.lf_delta.ref[2] = -1;
618 s->s.h.lf_delta.ref[3] = -1;
619 s->s.h.lf_delta.mode[0] = 0;
620 s->s.h.lf_delta.mode[1] = 0;
621 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
623 s->s.h.filter.level = get_bits(&s->gb, 6);
624 sharp = get_bits(&s->gb, 3);
625 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
626 // the old cache values since they are still valid
627 if (s->s.h.filter.sharpness != sharp) {
628 for (i = 1; i <= 63; i++) {
632 limit >>= (sharp + 3) >> 2;
633 limit = FFMIN(limit, 9 - sharp);
635 limit = FFMAX(limit, 1);
637 s->filter_lut.lim_lut[i] = limit;
638 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
641 s->s.h.filter.sharpness = sharp;
642 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
643 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
644 for (i = 0; i < 4; i++)
645 if (get_bits1(&s->gb))
646 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
647 for (i = 0; i < 2; i++)
648 if (get_bits1(&s->gb))
649 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
653 /* quantization header data */
654 s->s.h.yac_qi = get_bits(&s->gb, 8);
655 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
656 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
657 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
658 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
659 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
661 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
663 /* segmentation header info */
664 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
665 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
666 for (i = 0; i < 7; i++)
667 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
668 get_bits(&s->gb, 8) : 255;
669 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
670 for (i = 0; i < 3; i++)
671 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
672 get_bits(&s->gb, 8) : 255;
675 if (get_bits1(&s->gb)) {
676 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
677 for (i = 0; i < 8; i++) {
678 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
679 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
680 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
681 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
682 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
683 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
684 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
689 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
690 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
691 int qyac, qydc, quvac, quvdc, lflvl, sh;
693 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
694 if (s->s.h.segmentation.absolute_vals)
695 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
697 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
699 qyac = s->s.h.yac_qi;
701 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
702 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
703 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
704 qyac = av_clip_uintp2(qyac, 8);
706 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
707 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
708 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
709 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
711 sh = s->s.h.filter.level >= 32;
712 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
713 if (s->s.h.segmentation.absolute_vals)
714 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
716 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
718 lflvl = s->s.h.filter.level;
720 if (s->s.h.lf_delta.enabled) {
721 s->s.h.segmentation.feat[i].lflvl[0][0] =
722 s->s.h.segmentation.feat[i].lflvl[0][1] =
723 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
724 for (j = 1; j < 4; j++) {
725 s->s.h.segmentation.feat[i].lflvl[j][0] =
726 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
727 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
728 s->s.h.segmentation.feat[i].lflvl[j][1] =
729 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
730 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
733 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
734 sizeof(s->s.h.segmentation.feat[i].lflvl));
739 if ((ret = update_size(avctx, w, h)) < 0) {
740 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
744 for (s->s.h.tiling.log2_tile_cols = 0;
745 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
746 s->s.h.tiling.log2_tile_cols++) ;
747 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
748 max = FFMAX(0, max - 1);
749 while (max > s->s.h.tiling.log2_tile_cols) {
750 if (get_bits1(&s->gb))
751 s->s.h.tiling.log2_tile_cols++;
755 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
756 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
757 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
762 for (i = 0; i < s->active_tile_cols; i++) {
763 av_free(s->td[i].b_base);
764 av_free(s->td[i].block_base);
769 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
770 vp9_free_entries(avctx);
771 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
772 s->s.h.tiling.tile_cols : 1;
773 vp9_alloc_entries(avctx, s->sb_rows);
774 if (avctx->active_thread_type == FF_THREAD_SLICE) {
775 n_range_coders = 4; // max_tile_rows
777 n_range_coders = s->s.h.tiling.tile_cols;
779 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
780 n_range_coders * sizeof(VP56RangeCoder));
782 return AVERROR(ENOMEM);
783 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
784 for (i = 0; i < s->active_tile_cols; i++) {
787 rc += n_range_coders;
791 /* check reference frames */
792 if (!s->s.h.keyframe && !s->s.h.intraonly) {
793 for (i = 0; i < 3; i++) {
794 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
795 int refw = ref->width, refh = ref->height;
797 if (ref->format != avctx->pix_fmt) {
798 av_log(avctx, AV_LOG_ERROR,
799 "Ref pixfmt (%s) did not match current frame (%s)",
800 av_get_pix_fmt_name(ref->format),
801 av_get_pix_fmt_name(avctx->pix_fmt));
802 return AVERROR_INVALIDDATA;
803 } else if (refw == w && refh == h) {
804 s->mvscale[i][0] = s->mvscale[i][1] = 0;
806 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
807 av_log(avctx, AV_LOG_ERROR,
808 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
810 return AVERROR_INVALIDDATA;
812 s->mvscale[i][0] = (refw << 14) / w;
813 s->mvscale[i][1] = (refh << 14) / h;
814 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
815 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
820 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
821 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
822 s->prob_ctx[3].p = ff_vp9_default_probs;
823 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
824 sizeof(ff_vp9_default_coef_probs));
825 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
826 sizeof(ff_vp9_default_coef_probs));
827 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
828 sizeof(ff_vp9_default_coef_probs));
829 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
830 sizeof(ff_vp9_default_coef_probs));
831 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
832 s->prob_ctx[c].p = ff_vp9_default_probs;
833 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
834 sizeof(ff_vp9_default_coef_probs));
837 // next 16 bits is size of the rest of the header (arith-coded)
838 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
839 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
841 data2 = align_get_bits(&s->gb);
842 if (size2 > size - (data2 - data)) {
843 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
844 return AVERROR_INVALIDDATA;
846 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
850 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
851 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
852 return AVERROR_INVALIDDATA;
855 for (i = 0; i < s->active_tile_cols; i++) {
856 if (s->s.h.keyframe || s->s.h.intraonly) {
857 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
858 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
860 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
864 /* FIXME is it faster to not copy here, but do it down in the fw updates
865 * as explicit copies if the fw update is missing (and skip the copy upon
867 s->prob.p = s->prob_ctx[c].p;
870 if (s->s.h.lossless) {
871 s->s.h.txfmmode = TX_4X4;
873 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
874 if (s->s.h.txfmmode == 3)
875 s->s.h.txfmmode += vp8_rac_get(&s->c);
877 if (s->s.h.txfmmode == TX_SWITCHABLE) {
878 for (i = 0; i < 2; i++)
879 if (vp56_rac_get_prob_branchy(&s->c, 252))
880 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
881 for (i = 0; i < 2; i++)
882 for (j = 0; j < 2; j++)
883 if (vp56_rac_get_prob_branchy(&s->c, 252))
884 s->prob.p.tx16p[i][j] =
885 update_prob(&s->c, s->prob.p.tx16p[i][j]);
886 for (i = 0; i < 2; i++)
887 for (j = 0; j < 3; j++)
888 if (vp56_rac_get_prob_branchy(&s->c, 252))
889 s->prob.p.tx32p[i][j] =
890 update_prob(&s->c, s->prob.p.tx32p[i][j]);
895 for (i = 0; i < 4; i++) {
896 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
897 if (vp8_rac_get(&s->c)) {
898 for (j = 0; j < 2; j++)
899 for (k = 0; k < 2; k++)
900 for (l = 0; l < 6; l++)
901 for (m = 0; m < 6; m++) {
902 uint8_t *p = s->prob.coef[i][j][k][l][m];
903 uint8_t *r = ref[j][k][l][m];
904 if (m >= 3 && l == 0) // dc only has 3 pt
906 for (n = 0; n < 3; n++) {
907 if (vp56_rac_get_prob_branchy(&s->c, 252))
908 p[n] = update_prob(&s->c, r[n]);
912 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
915 for (j = 0; j < 2; j++)
916 for (k = 0; k < 2; k++)
917 for (l = 0; l < 6; l++)
918 for (m = 0; m < 6; m++) {
919 uint8_t *p = s->prob.coef[i][j][k][l][m];
920 uint8_t *r = ref[j][k][l][m];
921 if (m > 3 && l == 0) // dc only has 3 pt
924 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
927 if (s->s.h.txfmmode == i)
932 for (i = 0; i < 3; i++)
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
935 if (!s->s.h.keyframe && !s->s.h.intraonly) {
936 for (i = 0; i < 7; i++)
937 for (j = 0; j < 3; j++)
938 if (vp56_rac_get_prob_branchy(&s->c, 252))
939 s->prob.p.mv_mode[i][j] =
940 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
942 if (s->s.h.filtermode == FILTER_SWITCHABLE)
943 for (i = 0; i < 4; i++)
944 for (j = 0; j < 2; j++)
945 if (vp56_rac_get_prob_branchy(&s->c, 252))
946 s->prob.p.filter[i][j] =
947 update_prob(&s->c, s->prob.p.filter[i][j]);
949 for (i = 0; i < 4; i++)
950 if (vp56_rac_get_prob_branchy(&s->c, 252))
951 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
953 if (s->s.h.allowcompinter) {
954 s->s.h.comppredmode = vp8_rac_get(&s->c);
955 if (s->s.h.comppredmode)
956 s->s.h.comppredmode += vp8_rac_get(&s->c);
957 if (s->s.h.comppredmode == PRED_SWITCHABLE)
958 for (i = 0; i < 5; i++)
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
961 update_prob(&s->c, s->prob.p.comp[i]);
963 s->s.h.comppredmode = PRED_SINGLEREF;
966 if (s->s.h.comppredmode != PRED_COMPREF) {
967 for (i = 0; i < 5; i++) {
968 if (vp56_rac_get_prob_branchy(&s->c, 252))
969 s->prob.p.single_ref[i][0] =
970 update_prob(&s->c, s->prob.p.single_ref[i][0]);
971 if (vp56_rac_get_prob_branchy(&s->c, 252))
972 s->prob.p.single_ref[i][1] =
973 update_prob(&s->c, s->prob.p.single_ref[i][1]);
977 if (s->s.h.comppredmode != PRED_SINGLEREF) {
978 for (i = 0; i < 5; i++)
979 if (vp56_rac_get_prob_branchy(&s->c, 252))
980 s->prob.p.comp_ref[i] =
981 update_prob(&s->c, s->prob.p.comp_ref[i]);
984 for (i = 0; i < 4; i++)
985 for (j = 0; j < 9; j++)
986 if (vp56_rac_get_prob_branchy(&s->c, 252))
987 s->prob.p.y_mode[i][j] =
988 update_prob(&s->c, s->prob.p.y_mode[i][j]);
990 for (i = 0; i < 4; i++)
991 for (j = 0; j < 4; j++)
992 for (k = 0; k < 3; k++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.partition[3 - i][j][k] =
996 s->prob.p.partition[3 - i][j][k]);
998 // mv fields don't use the update_prob subexp model for some reason
999 for (i = 0; i < 3; i++)
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1003 for (i = 0; i < 2; i++) {
1004 if (vp56_rac_get_prob_branchy(&s->c, 252))
1005 s->prob.p.mv_comp[i].sign =
1006 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1008 for (j = 0; j < 10; j++)
1009 if (vp56_rac_get_prob_branchy(&s->c, 252))
1010 s->prob.p.mv_comp[i].classes[j] =
1011 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.mv_comp[i].class0 =
1015 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 for (j = 0; j < 10; j++)
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.mv_comp[i].bits[j] =
1020 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1023 for (i = 0; i < 2; i++) {
1024 for (j = 0; j < 2; j++)
1025 for (k = 0; k < 3; k++)
1026 if (vp56_rac_get_prob_branchy(&s->c, 252))
1027 s->prob.p.mv_comp[i].class0_fp[j][k] =
1028 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1030 for (j = 0; j < 3; j++)
1031 if (vp56_rac_get_prob_branchy(&s->c, 252))
1032 s->prob.p.mv_comp[i].fp[j] =
1033 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1036 if (s->s.h.highprecisionmvs) {
1037 for (i = 0; i < 2; i++) {
1038 if (vp56_rac_get_prob_branchy(&s->c, 252))
1039 s->prob.p.mv_comp[i].class0_hp =
1040 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1042 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043 s->prob.p.mv_comp[i].hp =
1044 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1049 return (data2 - data) + size2;
1052 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1053 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1055 const VP9Context *s = td->s;
1056 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1057 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1058 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1059 s->prob.p.partition[bl][c];
1060 enum BlockPartition bp;
1061 ptrdiff_t hbs = 4 >> bl;
1062 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1063 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1064 int bytesperpixel = s->bytesperpixel;
1067 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1068 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1069 } else if (col + hbs < s->cols) { // FIXME why not <=?
1070 if (row + hbs < s->rows) { // FIXME why not <=?
1071 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1073 case PARTITION_NONE:
1074 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1077 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1078 yoff += hbs * 8 * y_stride;
1079 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1080 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1083 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1084 yoff += hbs * 8 * bytesperpixel;
1085 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1086 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1088 case PARTITION_SPLIT:
1089 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1090 decode_sb(td, row, col + hbs, lflvl,
1091 yoff + 8 * hbs * bytesperpixel,
1092 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1093 yoff += hbs * 8 * y_stride;
1094 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1095 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1096 decode_sb(td, row + hbs, col + hbs, lflvl,
1097 yoff + 8 * hbs * bytesperpixel,
1098 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1103 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1104 bp = PARTITION_SPLIT;
1105 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1106 decode_sb(td, row, col + hbs, lflvl,
1107 yoff + 8 * hbs * bytesperpixel,
1108 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1111 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1113 } else if (row + hbs < s->rows) { // FIXME why not <=?
1114 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1115 bp = PARTITION_SPLIT;
1116 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1117 yoff += hbs * 8 * y_stride;
1118 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1119 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1122 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1125 bp = PARTITION_SPLIT;
1126 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1128 td->counts.partition[bl][c][bp]++;
1131 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1132 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1134 const VP9Context *s = td->s;
1135 VP9Block *b = td->b;
1136 ptrdiff_t hbs = 4 >> bl;
1137 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1138 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1139 int bytesperpixel = s->bytesperpixel;
1142 av_assert2(b->bl == BL_8X8);
1143 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1144 } else if (td->b->bl == bl) {
1145 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1146 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1147 yoff += hbs * 8 * y_stride;
1148 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1149 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1150 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1151 yoff += hbs * 8 * bytesperpixel;
1152 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1153 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1156 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1157 if (col + hbs < s->cols) { // FIXME why not <=?
1158 if (row + hbs < s->rows) {
1159 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1160 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1161 yoff += hbs * 8 * y_stride;
1162 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1163 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1164 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1165 yoff + 8 * hbs * bytesperpixel,
1166 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1168 yoff += hbs * 8 * bytesperpixel;
1169 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1170 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1172 } else if (row + hbs < s->rows) {
1173 yoff += hbs * 8 * y_stride;
1174 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1175 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1180 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1182 int sb_start = ( idx * n) >> log2_n;
1183 int sb_end = ((idx + 1) * n) >> log2_n;
1184 *start = FFMIN(sb_start, n) << 3;
1185 *end = FFMIN(sb_end, n) << 3;
1188 static void free_buffers(VP9Context *s)
1192 av_freep(&s->intra_pred_data[0]);
1193 for (i = 0; i < s->active_tile_cols; i++) {
1194 av_freep(&s->td[i].b_base);
1195 av_freep(&s->td[i].block_base);
1199 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1201 VP9Context *s = avctx->priv_data;
1204 for (i = 0; i < 3; i++) {
1205 if (s->s.frames[i].tf.f->buf[0])
1206 vp9_frame_unref(avctx, &s->s.frames[i]);
1207 av_frame_free(&s->s.frames[i].tf.f);
1209 for (i = 0; i < 8; i++) {
1210 if (s->s.refs[i].f->buf[0])
1211 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1212 av_frame_free(&s->s.refs[i].f);
1213 if (s->next_refs[i].f->buf[0])
1214 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1215 av_frame_free(&s->next_refs[i].f);
1219 vp9_free_entries(avctx);
1224 static int decode_tiles(AVCodecContext *avctx,
1225 const uint8_t *data, int size)
1227 VP9Context *s = avctx->priv_data;
1228 VP9TileData *td = &s->td[0];
1229 int row, col, tile_row, tile_col, ret;
1231 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1233 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1235 f = s->s.frames[CUR_FRAME].tf.f;
1236 ls_y = f->linesize[0];
1237 ls_uv =f->linesize[1];
1238 bytesperpixel = s->bytesperpixel;
1241 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1242 set_tile_offset(&tile_row_start, &tile_row_end,
1243 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1245 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1248 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1249 tile_row == s->s.h.tiling.tile_rows - 1) {
1252 tile_size = AV_RB32(data);
1256 if (tile_size > size) {
1257 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1258 return AVERROR_INVALIDDATA;
1260 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1263 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1264 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1265 return AVERROR_INVALIDDATA;
1271 for (row = tile_row_start; row < tile_row_end;
1272 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1273 VP9Filter *lflvl_ptr = s->lflvl;
1274 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1276 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1277 set_tile_offset(&tile_col_start, &tile_col_end,
1278 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1279 td->tile_col_start = tile_col_start;
1281 memset(td->left_partition_ctx, 0, 8);
1282 memset(td->left_skip_ctx, 0, 8);
1283 if (s->s.h.keyframe || s->s.h.intraonly) {
1284 memset(td->left_mode_ctx, DC_PRED, 16);
1286 memset(td->left_mode_ctx, NEARESTMV, 8);
1288 memset(td->left_y_nnz_ctx, 0, 16);
1289 memset(td->left_uv_nnz_ctx, 0, 32);
1290 memset(td->left_segpred_ctx, 0, 8);
1292 td->c = &td->c_b[tile_col];
1295 for (col = tile_col_start;
1297 col += 8, yoff2 += 64 * bytesperpixel,
1298 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1299 // FIXME integrate with lf code (i.e. zero after each
1300 // use, similar to invtxfm coefficients, or similar)
1302 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1306 decode_sb_mem(td, row, col, lflvl_ptr,
1307 yoff2, uvoff2, BL_64X64);
1309 decode_sb(td, row, col, lflvl_ptr,
1310 yoff2, uvoff2, BL_64X64);
1318 // backup pre-loopfilter reconstruction data for intra
1319 // prediction of next row of sb64s
1320 if (row + 8 < s->rows) {
1321 memcpy(s->intra_pred_data[0],
1322 f->data[0] + yoff + 63 * ls_y,
1323 8 * s->cols * bytesperpixel);
1324 memcpy(s->intra_pred_data[1],
1325 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1326 8 * s->cols * bytesperpixel >> s->ss_h);
1327 memcpy(s->intra_pred_data[2],
1328 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1329 8 * s->cols * bytesperpixel >> s->ss_h);
1332 // loopfilter one row
1333 if (s->s.h.filter.level) {
1336 lflvl_ptr = s->lflvl;
1337 for (col = 0; col < s->cols;
1338 col += 8, yoff2 += 64 * bytesperpixel,
1339 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1340 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1345 // FIXME maybe we can make this more finegrained by running the
1346 // loopfilter per-block instead of after each sbrow
1347 // In fact that would also make intra pred left preparation easier?
1348 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1355 static av_always_inline
1356 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1359 VP9Context *s = avctx->priv_data;
1360 VP9TileData *td = &s->td[jobnr];
1361 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1362 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1363 unsigned tile_cols_len;
1364 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1365 VP9Filter *lflvl_ptr_base;
1368 f = s->s.frames[CUR_FRAME].tf.f;
1369 ls_y = f->linesize[0];
1370 ls_uv =f->linesize[1];
1372 set_tile_offset(&tile_col_start, &tile_col_end,
1373 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1374 td->tile_col_start = tile_col_start;
1375 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1376 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1377 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1379 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1380 set_tile_offset(&tile_row_start, &tile_row_end,
1381 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1383 td->c = &td->c_b[tile_row];
1384 for (row = tile_row_start; row < tile_row_end;
1385 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1386 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1387 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1389 memset(td->left_partition_ctx, 0, 8);
1390 memset(td->left_skip_ctx, 0, 8);
1391 if (s->s.h.keyframe || s->s.h.intraonly) {
1392 memset(td->left_mode_ctx, DC_PRED, 16);
1394 memset(td->left_mode_ctx, NEARESTMV, 8);
1396 memset(td->left_y_nnz_ctx, 0, 16);
1397 memset(td->left_uv_nnz_ctx, 0, 32);
1398 memset(td->left_segpred_ctx, 0, 8);
1400 for (col = tile_col_start;
1402 col += 8, yoff2 += 64 * bytesperpixel,
1403 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1404 // FIXME integrate with lf code (i.e. zero after each
1405 // use, similar to invtxfm coefficients, or similar)
1406 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1407 decode_sb(td, row, col, lflvl_ptr,
1408 yoff2, uvoff2, BL_64X64);
1411 // backup pre-loopfilter reconstruction data for intra
1412 // prediction of next row of sb64s
1413 tile_cols_len = tile_col_end - tile_col_start;
1414 if (row + 8 < s->rows) {
1415 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1416 f->data[0] + yoff + 63 * ls_y,
1417 8 * tile_cols_len * bytesperpixel);
1418 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1419 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1420 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1421 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1422 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1423 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1426 vp9_report_tile_progress(s, row >> 3, 1);
1432 static av_always_inline
1433 int loopfilter_proc(AVCodecContext *avctx)
1435 VP9Context *s = avctx->priv_data;
1436 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1437 VP9Filter *lflvl_ptr;
1438 int bytesperpixel = s->bytesperpixel, col, i;
1441 f = s->s.frames[CUR_FRAME].tf.f;
1442 ls_y = f->linesize[0];
1443 ls_uv =f->linesize[1];
1445 for (i = 0; i < s->sb_rows; i++) {
1446 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1448 if (s->s.h.filter.level) {
1449 yoff = (ls_y * 64)*i;
1450 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1451 lflvl_ptr = s->lflvl+s->sb_cols*i;
1452 for (col = 0; col < s->cols;
1453 col += 8, yoff += 64 * bytesperpixel,
1454 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1455 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1464 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1465 int *got_frame, AVPacket *pkt)
1467 const uint8_t *data = pkt->data;
1468 int size = pkt->size;
1469 VP9Context *s = avctx->priv_data;
1471 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1472 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1475 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1477 } else if (ret == 0) {
1478 if (!s->s.refs[ref].f->buf[0]) {
1479 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1480 return AVERROR_INVALIDDATA;
1482 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1484 ((AVFrame *)frame)->pts = pkt->pts;
1486 FF_DISABLE_DEPRECATION_WARNINGS
1487 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1488 FF_ENABLE_DEPRECATION_WARNINGS
1490 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1491 for (i = 0; i < 8; i++) {
1492 if (s->next_refs[i].f->buf[0])
1493 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1494 if (s->s.refs[i].f->buf[0] &&
1495 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1504 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1505 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1506 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1507 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1508 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1511 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1512 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1513 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1514 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1516 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1517 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1518 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1520 f = s->s.frames[CUR_FRAME].tf.f;
1521 f->key_frame = s->s.h.keyframe;
1522 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1524 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1525 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1526 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1527 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1531 for (i = 0; i < 8; i++) {
1532 if (s->next_refs[i].f->buf[0])
1533 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1534 if (s->s.h.refreshrefmask & (1 << i)) {
1535 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1536 } else if (s->s.refs[i].f->buf[0]) {
1537 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1543 if (avctx->hwaccel) {
1544 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1547 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1550 ret = avctx->hwaccel->end_frame(avctx);
1556 // main tile decode loop
1557 memset(s->above_partition_ctx, 0, s->cols);
1558 memset(s->above_skip_ctx, 0, s->cols);
1559 if (s->s.h.keyframe || s->s.h.intraonly) {
1560 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1562 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1564 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1565 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1566 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1567 memset(s->above_segpred_ctx, 0, s->cols);
1568 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1569 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1570 if ((ret = update_block_buffers(avctx)) < 0) {
1571 av_log(avctx, AV_LOG_ERROR,
1572 "Failed to allocate block buffers\n");
1575 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1578 for (i = 0; i < 4; i++) {
1579 for (j = 0; j < 2; j++)
1580 for (k = 0; k < 2; k++)
1581 for (l = 0; l < 6; l++)
1582 for (m = 0; m < 6; m++)
1583 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1584 s->prob.coef[i][j][k][l][m], 3);
1585 if (s->s.h.txfmmode == i)
1588 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1589 ff_thread_finish_setup(avctx);
1590 } else if (!s->s.h.refreshctx) {
1591 ff_thread_finish_setup(avctx);
1595 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1596 for (i = 0; i < s->sb_rows; i++)
1597 atomic_store(&s->entries[i], 0);
1602 for (i = 0; i < s->active_tile_cols; i++) {
1603 s->td[i].b = s->td[i].b_base;
1604 s->td[i].block = s->td[i].block_base;
1605 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1606 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1607 s->td[i].eob = s->td[i].eob_base;
1608 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1609 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1613 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1614 int tile_row, tile_col;
1616 av_assert1(!s->pass);
1618 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1619 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1622 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1623 tile_row == s->s.h.tiling.tile_rows - 1) {
1626 tile_size = AV_RB32(data);
1630 if (tile_size > size)
1631 return AVERROR_INVALIDDATA;
1632 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1635 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1636 return AVERROR_INVALIDDATA;
1642 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1646 ret = decode_tiles(avctx, data, size);
1648 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1653 // Sum all counts fields into td[0].counts for tile threading
1654 if (avctx->active_thread_type == FF_THREAD_SLICE)
1655 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1656 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1657 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1659 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1660 ff_vp9_adapt_probs(s);
1661 ff_thread_finish_setup(avctx);
1663 } while (s->pass++ == 1);
1664 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1668 for (i = 0; i < 8; i++) {
1669 if (s->s.refs[i].f->buf[0])
1670 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1671 if (s->next_refs[i].f->buf[0] &&
1672 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1676 if (!s->s.h.invisible) {
1677 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1685 static void vp9_decode_flush(AVCodecContext *avctx)
1687 VP9Context *s = avctx->priv_data;
1690 for (i = 0; i < 3; i++)
1691 vp9_frame_unref(avctx, &s->s.frames[i]);
1692 for (i = 0; i < 8; i++)
1693 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1696 static int init_frames(AVCodecContext *avctx)
1698 VP9Context *s = avctx->priv_data;
1701 for (i = 0; i < 3; i++) {
1702 s->s.frames[i].tf.f = av_frame_alloc();
1703 if (!s->s.frames[i].tf.f) {
1704 vp9_decode_free(avctx);
1705 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1706 return AVERROR(ENOMEM);
1709 for (i = 0; i < 8; i++) {
1710 s->s.refs[i].f = av_frame_alloc();
1711 s->next_refs[i].f = av_frame_alloc();
1712 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1713 vp9_decode_free(avctx);
1714 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1715 return AVERROR(ENOMEM);
1722 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1724 VP9Context *s = avctx->priv_data;
1726 avctx->internal->allocate_progress = 1;
1728 s->s.h.filter.sharpness = -1;
1730 return init_frames(avctx);
1734 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1736 return init_frames(avctx);
1739 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1742 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1744 for (i = 0; i < 3; i++) {
1745 if (s->s.frames[i].tf.f->buf[0])
1746 vp9_frame_unref(dst, &s->s.frames[i]);
1747 if (ssrc->s.frames[i].tf.f->buf[0]) {
1748 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1752 for (i = 0; i < 8; i++) {
1753 if (s->s.refs[i].f->buf[0])
1754 ff_thread_release_buffer(dst, &s->s.refs[i]);
1755 if (ssrc->next_refs[i].f->buf[0]) {
1756 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1761 s->s.h.invisible = ssrc->s.h.invisible;
1762 s->s.h.keyframe = ssrc->s.h.keyframe;
1763 s->s.h.intraonly = ssrc->s.h.intraonly;
1764 s->ss_v = ssrc->ss_v;
1765 s->ss_h = ssrc->ss_h;
1766 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1767 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1768 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1769 s->bytesperpixel = ssrc->bytesperpixel;
1770 s->gf_fmt = ssrc->gf_fmt;
1773 s->s.h.bpp = ssrc->s.h.bpp;
1774 s->bpp_index = ssrc->bpp_index;
1775 s->pix_fmt = ssrc->pix_fmt;
1776 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1777 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1778 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1779 sizeof(s->s.h.segmentation.feat));
1785 AVCodec ff_vp9_decoder = {
1787 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1788 .type = AVMEDIA_TYPE_VIDEO,
1789 .id = AV_CODEC_ID_VP9,
1790 .priv_data_size = sizeof(VP9Context),
1791 .init = vp9_decode_init,
1792 .close = vp9_decode_free,
1793 .decode = vp9_decode_frame,
1794 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1795 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1796 .flush = vp9_decode_flush,
1797 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1798 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1799 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1800 .bsfs = "vp9_superframe_split",
1801 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1802 #if CONFIG_VP9_DXVA2_HWACCEL
1805 #if CONFIG_VP9_D3D11VA_HWACCEL
1806 HWACCEL_D3D11VA(vp9),
1808 #if CONFIG_VP9_D3D11VA2_HWACCEL
1809 HWACCEL_D3D11VA2(vp9),
1811 #if CONFIG_VP9_NVDEC_HWACCEL
1814 #if CONFIG_VP9_VAAPI_HWACCEL