2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
35 #include "libavutil/avassert.h"
36 #include "libavutil/pixdesc.h"
38 #define VP9_SYNCCODE 0x498342
41 static void vp9_free_entries(AVCodecContext *avctx) {
42 VP9Context *s = avctx->priv_data;
44 if (avctx->active_thread_type & FF_THREAD_SLICE) {
45 pthread_mutex_destroy(&s->progress_mutex);
46 pthread_cond_destroy(&s->progress_cond);
47 av_freep(&s->entries);
51 static int vp9_alloc_entries(AVCodecContext *avctx, int n) {
52 VP9Context *s = avctx->priv_data;
55 if (avctx->active_thread_type & FF_THREAD_SLICE) {
57 av_freep(&s->entries);
59 s->entries = av_malloc_array(n, sizeof(atomic_int));
62 av_freep(&s->entries);
63 return AVERROR(ENOMEM);
66 for (i = 0; i < n; i++)
67 atomic_init(&s->entries[i], 0);
69 pthread_mutex_init(&s->progress_mutex, NULL);
70 pthread_cond_init(&s->progress_cond, NULL);
75 static void vp9_report_tile_progress(VP9Context *s, int field, int n) {
76 pthread_mutex_lock(&s->progress_mutex);
77 atomic_fetch_add_explicit(&s->entries[field], n, memory_order_release);
78 pthread_cond_signal(&s->progress_cond);
79 pthread_mutex_unlock(&s->progress_mutex);
82 static void vp9_await_tile_progress(VP9Context *s, int field, int n) {
83 if (atomic_load_explicit(&s->entries[field], memory_order_acquire) >= n)
86 pthread_mutex_lock(&s->progress_mutex);
87 while (atomic_load_explicit(&s->entries[field], memory_order_relaxed) != n)
88 pthread_cond_wait(&s->progress_cond, &s->progress_mutex);
89 pthread_mutex_unlock(&s->progress_mutex);
92 static void vp9_free_entries(AVCodecContext *avctx) {}
93 static int vp9_alloc_entries(AVCodecContext *avctx, int n) { return 0; }
96 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
98 ff_thread_release_buffer(avctx, &f->tf);
99 av_buffer_unref(&f->extradata);
100 av_buffer_unref(&f->hwaccel_priv_buf);
101 f->segmentation_map = NULL;
102 f->hwaccel_picture_private = NULL;
105 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
107 VP9Context *s = avctx->priv_data;
110 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
114 sz = 64 * s->sb_cols * s->sb_rows;
115 f->extradata = av_buffer_allocz(sz * (1 + sizeof(VP9mvrefPair)));
120 f->segmentation_map = f->extradata->data;
121 f->mv = (VP9mvrefPair *) (f->extradata->data + sz);
123 if (avctx->hwaccel) {
124 const AVHWAccel *hwaccel = avctx->hwaccel;
125 av_assert0(!f->hwaccel_picture_private);
126 if (hwaccel->frame_priv_data_size) {
127 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
128 if (!f->hwaccel_priv_buf)
130 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
137 vp9_frame_unref(avctx, f);
138 return AVERROR(ENOMEM);
141 static int vp9_frame_ref(AVCodecContext *avctx, VP9Frame *dst, VP9Frame *src)
145 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
149 dst->extradata = av_buffer_ref(src->extradata);
153 dst->segmentation_map = src->segmentation_map;
155 dst->uses_2pass = src->uses_2pass;
157 if (src->hwaccel_picture_private) {
158 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
159 if (!dst->hwaccel_priv_buf)
161 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
167 vp9_frame_unref(avctx, dst);
168 return AVERROR(ENOMEM);
171 static int update_size(AVCodecContext *avctx, int w, int h)
173 #define HWACCEL_MAX (CONFIG_VP9_DXVA2_HWACCEL + \
174 CONFIG_VP9_D3D11VA_HWACCEL * 2 + \
175 CONFIG_VP9_NVDEC_HWACCEL + \
176 CONFIG_VP9_VAAPI_HWACCEL)
177 enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmtp = pix_fmts;
178 VP9Context *s = avctx->priv_data;
180 int bytesperpixel = s->bytesperpixel, ret, cols, rows;
183 av_assert0(w > 0 && h > 0);
185 if (!(s->pix_fmt == s->gf_fmt && w == s->w && h == s->h)) {
186 if ((ret = ff_set_dimensions(avctx, w, h)) < 0)
189 switch (s->pix_fmt) {
190 case AV_PIX_FMT_YUV420P:
191 case AV_PIX_FMT_YUV420P10:
192 #if CONFIG_VP9_DXVA2_HWACCEL
193 *fmtp++ = AV_PIX_FMT_DXVA2_VLD;
195 #if CONFIG_VP9_D3D11VA_HWACCEL
196 *fmtp++ = AV_PIX_FMT_D3D11VA_VLD;
197 *fmtp++ = AV_PIX_FMT_D3D11;
199 #if CONFIG_VP9_NVDEC_HWACCEL
200 *fmtp++ = AV_PIX_FMT_CUDA;
202 #if CONFIG_VP9_VAAPI_HWACCEL
203 *fmtp++ = AV_PIX_FMT_VAAPI;
206 case AV_PIX_FMT_YUV420P12:
207 #if CONFIG_VP9_NVDEC_HWACCEL
208 *fmtp++ = AV_PIX_FMT_CUDA;
210 #if CONFIG_VP9_VAAPI_HWACCEL
211 *fmtp++ = AV_PIX_FMT_VAAPI;
216 *fmtp++ = s->pix_fmt;
217 *fmtp = AV_PIX_FMT_NONE;
219 ret = ff_thread_get_format(avctx, pix_fmts);
223 avctx->pix_fmt = ret;
224 s->gf_fmt = s->pix_fmt;
232 if (s->intra_pred_data[0] && cols == s->cols && rows == s->rows && s->pix_fmt == s->last_fmt)
235 s->last_fmt = s->pix_fmt;
236 s->sb_cols = (w + 63) >> 6;
237 s->sb_rows = (h + 63) >> 6;
238 s->cols = (w + 7) >> 3;
239 s->rows = (h + 7) >> 3;
240 lflvl_len = avctx->active_thread_type == FF_THREAD_SLICE ? s->sb_rows : 1;
242 #define assign(var, type, n) var = (type) p; p += s->sb_cols * (n) * sizeof(*var)
243 av_freep(&s->intra_pred_data[0]);
244 // FIXME we slightly over-allocate here for subsampled chroma, but a little
245 // bit of padding shouldn't affect performance...
246 p = av_malloc(s->sb_cols * (128 + 192 * bytesperpixel +
247 lflvl_len * sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
249 return AVERROR(ENOMEM);
250 assign(s->intra_pred_data[0], uint8_t *, 64 * bytesperpixel);
251 assign(s->intra_pred_data[1], uint8_t *, 64 * bytesperpixel);
252 assign(s->intra_pred_data[2], uint8_t *, 64 * bytesperpixel);
253 assign(s->above_y_nnz_ctx, uint8_t *, 16);
254 assign(s->above_mode_ctx, uint8_t *, 16);
255 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
256 assign(s->above_uv_nnz_ctx[0], uint8_t *, 16);
257 assign(s->above_uv_nnz_ctx[1], uint8_t *, 16);
258 assign(s->above_partition_ctx, uint8_t *, 8);
259 assign(s->above_skip_ctx, uint8_t *, 8);
260 assign(s->above_txfm_ctx, uint8_t *, 8);
261 assign(s->above_segpred_ctx, uint8_t *, 8);
262 assign(s->above_intra_ctx, uint8_t *, 8);
263 assign(s->above_comp_ctx, uint8_t *, 8);
264 assign(s->above_ref_ctx, uint8_t *, 8);
265 assign(s->above_filter_ctx, uint8_t *, 8);
266 assign(s->lflvl, VP9Filter *, lflvl_len);
270 for (i = 0; i < s->active_tile_cols; i++) {
271 av_freep(&s->td[i].b_base);
272 av_freep(&s->td[i].block_base);
276 if (s->s.h.bpp != s->last_bpp) {
277 ff_vp9dsp_init(&s->dsp, s->s.h.bpp, avctx->flags & AV_CODEC_FLAG_BITEXACT);
278 ff_videodsp_init(&s->vdsp, s->s.h.bpp);
279 s->last_bpp = s->s.h.bpp;
285 static int update_block_buffers(AVCodecContext *avctx)
288 VP9Context *s = avctx->priv_data;
289 int chroma_blocks, chroma_eobs, bytesperpixel = s->bytesperpixel;
290 VP9TileData *td = &s->td[0];
292 if (td->b_base && td->block_base && s->block_alloc_using_2pass == s->s.frames[CUR_FRAME].uses_2pass)
296 av_free(td->block_base);
297 chroma_blocks = 64 * 64 >> (s->ss_h + s->ss_v);
298 chroma_eobs = 16 * 16 >> (s->ss_h + s->ss_v);
299 if (s->s.frames[CUR_FRAME].uses_2pass) {
300 int sbs = s->sb_cols * s->sb_rows;
302 td->b_base = av_malloc_array(s->cols * s->rows, sizeof(VP9Block));
303 td->block_base = av_mallocz(((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
304 16 * 16 + 2 * chroma_eobs) * sbs);
305 if (!td->b_base || !td->block_base)
306 return AVERROR(ENOMEM);
307 td->uvblock_base[0] = td->block_base + sbs * 64 * 64 * bytesperpixel;
308 td->uvblock_base[1] = td->uvblock_base[0] + sbs * chroma_blocks * bytesperpixel;
309 td->eob_base = (uint8_t *) (td->uvblock_base[1] + sbs * chroma_blocks * bytesperpixel);
310 td->uveob_base[0] = td->eob_base + 16 * 16 * sbs;
311 td->uveob_base[1] = td->uveob_base[0] + chroma_eobs * sbs;
313 for (i = 1; i < s->active_tile_cols; i++) {
314 if (s->td[i].b_base && s->td[i].block_base) {
315 av_free(s->td[i].b_base);
316 av_free(s->td[i].block_base);
319 for (i = 0; i < s->active_tile_cols; i++) {
320 s->td[i].b_base = av_malloc(sizeof(VP9Block));
321 s->td[i].block_base = av_mallocz((64 * 64 + 2 * chroma_blocks) * bytesperpixel * sizeof(int16_t) +
322 16 * 16 + 2 * chroma_eobs);
323 if (!s->td[i].b_base || !s->td[i].block_base)
324 return AVERROR(ENOMEM);
325 s->td[i].uvblock_base[0] = s->td[i].block_base + 64 * 64 * bytesperpixel;
326 s->td[i].uvblock_base[1] = s->td[i].uvblock_base[0] + chroma_blocks * bytesperpixel;
327 s->td[i].eob_base = (uint8_t *) (s->td[i].uvblock_base[1] + chroma_blocks * bytesperpixel);
328 s->td[i].uveob_base[0] = s->td[i].eob_base + 16 * 16;
329 s->td[i].uveob_base[1] = s->td[i].uveob_base[0] + chroma_eobs;
332 s->block_alloc_using_2pass = s->s.frames[CUR_FRAME].uses_2pass;
337 // The sign bit is at the end, not the start, of a bit sequence
338 static av_always_inline int get_sbits_inv(GetBitContext *gb, int n)
340 int v = get_bits(gb, n);
341 return get_bits1(gb) ? -v : v;
344 static av_always_inline int inv_recenter_nonneg(int v, int m)
349 return m - ((v + 1) >> 1);
353 // differential forward probability updates
354 static int update_prob(VP56RangeCoder *c, int p)
356 static const int inv_map_table[255] = {
357 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
358 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
359 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
360 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
361 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
362 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
363 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
364 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
365 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
366 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
367 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
368 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
369 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
370 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
371 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
372 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
373 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
374 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
379 /* This code is trying to do a differential probability update. For a
380 * current probability A in the range [1, 255], the difference to a new
381 * probability of any value can be expressed differentially as 1-A, 255-A
382 * where some part of this (absolute range) exists both in positive as
383 * well as the negative part, whereas another part only exists in one
384 * half. We're trying to code this shared part differentially, i.e.
385 * times two where the value of the lowest bit specifies the sign, and
386 * the single part is then coded on top of this. This absolute difference
387 * then again has a value of [0, 254], but a bigger value in this range
388 * indicates that we're further away from the original value A, so we
389 * can code this as a VLC code, since higher values are increasingly
390 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
391 * updates vs. the 'fine, exact' updates further down the range, which
392 * adds one extra dimension to this differential update model. */
394 if (!vp8_rac_get(c)) {
395 d = vp8_rac_get_uint(c, 4) + 0;
396 } else if (!vp8_rac_get(c)) {
397 d = vp8_rac_get_uint(c, 4) + 16;
398 } else if (!vp8_rac_get(c)) {
399 d = vp8_rac_get_uint(c, 5) + 32;
401 d = vp8_rac_get_uint(c, 7);
403 d = (d << 1) - 65 + vp8_rac_get(c);
405 av_assert2(d < FF_ARRAY_ELEMS(inv_map_table));
408 return p <= 128 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1) :
409 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
412 static int read_colorspace_details(AVCodecContext *avctx)
414 static const enum AVColorSpace colorspaces[8] = {
415 AVCOL_SPC_UNSPECIFIED, AVCOL_SPC_BT470BG, AVCOL_SPC_BT709, AVCOL_SPC_SMPTE170M,
416 AVCOL_SPC_SMPTE240M, AVCOL_SPC_BT2020_NCL, AVCOL_SPC_RESERVED, AVCOL_SPC_RGB,
418 VP9Context *s = avctx->priv_data;
419 int bits = avctx->profile <= 1 ? 0 : 1 + get_bits1(&s->gb); // 0:8, 1:10, 2:12
422 s->s.h.bpp = 8 + bits * 2;
423 s->bytesperpixel = (7 + s->s.h.bpp) >> 3;
424 avctx->colorspace = colorspaces[get_bits(&s->gb, 3)];
425 if (avctx->colorspace == AVCOL_SPC_RGB) { // RGB = profile 1
426 static const enum AVPixelFormat pix_fmt_rgb[3] = {
427 AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP10, AV_PIX_FMT_GBRP12
429 s->ss_h = s->ss_v = 0;
430 avctx->color_range = AVCOL_RANGE_JPEG;
431 s->pix_fmt = pix_fmt_rgb[bits];
432 if (avctx->profile & 1) {
433 if (get_bits1(&s->gb)) {
434 av_log(avctx, AV_LOG_ERROR, "Reserved bit set in RGB\n");
435 return AVERROR_INVALIDDATA;
438 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile %d\n",
440 return AVERROR_INVALIDDATA;
443 static const enum AVPixelFormat pix_fmt_for_ss[3][2 /* v */][2 /* h */] = {
444 { { AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV422P },
445 { AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV420P } },
446 { { AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV422P10 },
447 { AV_PIX_FMT_YUV440P10, AV_PIX_FMT_YUV420P10 } },
448 { { AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV422P12 },
449 { AV_PIX_FMT_YUV440P12, AV_PIX_FMT_YUV420P12 } }
451 avctx->color_range = get_bits1(&s->gb) ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
452 if (avctx->profile & 1) {
453 s->ss_h = get_bits1(&s->gb);
454 s->ss_v = get_bits1(&s->gb);
455 s->pix_fmt = pix_fmt_for_ss[bits][s->ss_v][s->ss_h];
456 if (s->pix_fmt == AV_PIX_FMT_YUV420P) {
457 av_log(avctx, AV_LOG_ERROR, "YUV 4:2:0 not supported in profile %d\n",
459 return AVERROR_INVALIDDATA;
460 } else if (get_bits1(&s->gb)) {
461 av_log(avctx, AV_LOG_ERROR, "Profile %d color details reserved bit set\n",
463 return AVERROR_INVALIDDATA;
466 s->ss_h = s->ss_v = 1;
467 s->pix_fmt = pix_fmt_for_ss[bits][1][1];
474 static int decode_frame_header(AVCodecContext *avctx,
475 const uint8_t *data, int size, int *ref)
477 VP9Context *s = avctx->priv_data;
478 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
480 const uint8_t *data2;
483 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
484 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
487 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
488 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
489 return AVERROR_INVALIDDATA;
491 avctx->profile = get_bits1(&s->gb);
492 avctx->profile |= get_bits1(&s->gb) << 1;
493 if (avctx->profile == 3) avctx->profile += get_bits1(&s->gb);
494 if (avctx->profile > 3) {
495 av_log(avctx, AV_LOG_ERROR, "Profile %d is not yet supported\n", avctx->profile);
496 return AVERROR_INVALIDDATA;
498 s->s.h.profile = avctx->profile;
499 if (get_bits1(&s->gb)) {
500 *ref = get_bits(&s->gb, 3);
504 s->last_keyframe = s->s.h.keyframe;
505 s->s.h.keyframe = !get_bits1(&s->gb);
507 last_invisible = s->s.h.invisible;
508 s->s.h.invisible = !get_bits1(&s->gb);
509 s->s.h.errorres = get_bits1(&s->gb);
510 s->s.h.use_last_frame_mvs = !s->s.h.errorres && !last_invisible;
512 if (s->s.h.keyframe) {
513 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
514 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
515 return AVERROR_INVALIDDATA;
517 if ((ret = read_colorspace_details(avctx)) < 0)
519 // for profile 1, here follows the subsampling bits
520 s->s.h.refreshrefmask = 0xff;
521 w = get_bits(&s->gb, 16) + 1;
522 h = get_bits(&s->gb, 16) + 1;
523 if (get_bits1(&s->gb)) // display size
524 skip_bits(&s->gb, 32);
526 s->s.h.intraonly = s->s.h.invisible ? get_bits1(&s->gb) : 0;
527 s->s.h.resetctx = s->s.h.errorres ? 0 : get_bits(&s->gb, 2);
528 if (s->s.h.intraonly) {
529 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
530 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
531 return AVERROR_INVALIDDATA;
533 if (avctx->profile >= 1) {
534 if ((ret = read_colorspace_details(avctx)) < 0)
537 s->ss_h = s->ss_v = 1;
540 s->bytesperpixel = 1;
541 s->pix_fmt = AV_PIX_FMT_YUV420P;
542 avctx->colorspace = AVCOL_SPC_BT470BG;
543 avctx->color_range = AVCOL_RANGE_MPEG;
545 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
546 w = get_bits(&s->gb, 16) + 1;
547 h = get_bits(&s->gb, 16) + 1;
548 if (get_bits1(&s->gb)) // display size
549 skip_bits(&s->gb, 32);
551 s->s.h.refreshrefmask = get_bits(&s->gb, 8);
552 s->s.h.refidx[0] = get_bits(&s->gb, 3);
553 s->s.h.signbias[0] = get_bits1(&s->gb) && !s->s.h.errorres;
554 s->s.h.refidx[1] = get_bits(&s->gb, 3);
555 s->s.h.signbias[1] = get_bits1(&s->gb) && !s->s.h.errorres;
556 s->s.h.refidx[2] = get_bits(&s->gb, 3);
557 s->s.h.signbias[2] = get_bits1(&s->gb) && !s->s.h.errorres;
558 if (!s->s.refs[s->s.h.refidx[0]].f->buf[0] ||
559 !s->s.refs[s->s.h.refidx[1]].f->buf[0] ||
560 !s->s.refs[s->s.h.refidx[2]].f->buf[0]) {
561 av_log(avctx, AV_LOG_ERROR, "Not all references are available\n");
562 return AVERROR_INVALIDDATA;
564 if (get_bits1(&s->gb)) {
565 w = s->s.refs[s->s.h.refidx[0]].f->width;
566 h = s->s.refs[s->s.h.refidx[0]].f->height;
567 } else if (get_bits1(&s->gb)) {
568 w = s->s.refs[s->s.h.refidx[1]].f->width;
569 h = s->s.refs[s->s.h.refidx[1]].f->height;
570 } else if (get_bits1(&s->gb)) {
571 w = s->s.refs[s->s.h.refidx[2]].f->width;
572 h = s->s.refs[s->s.h.refidx[2]].f->height;
574 w = get_bits(&s->gb, 16) + 1;
575 h = get_bits(&s->gb, 16) + 1;
577 // Note that in this code, "CUR_FRAME" is actually before we
578 // have formally allocated a frame, and thus actually represents
580 s->s.h.use_last_frame_mvs &= s->s.frames[CUR_FRAME].tf.f->width == w &&
581 s->s.frames[CUR_FRAME].tf.f->height == h;
582 if (get_bits1(&s->gb)) // display size
583 skip_bits(&s->gb, 32);
584 s->s.h.highprecisionmvs = get_bits1(&s->gb);
585 s->s.h.filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
587 s->s.h.allowcompinter = s->s.h.signbias[0] != s->s.h.signbias[1] ||
588 s->s.h.signbias[0] != s->s.h.signbias[2];
589 if (s->s.h.allowcompinter) {
590 if (s->s.h.signbias[0] == s->s.h.signbias[1]) {
591 s->s.h.fixcompref = 2;
592 s->s.h.varcompref[0] = 0;
593 s->s.h.varcompref[1] = 1;
594 } else if (s->s.h.signbias[0] == s->s.h.signbias[2]) {
595 s->s.h.fixcompref = 1;
596 s->s.h.varcompref[0] = 0;
597 s->s.h.varcompref[1] = 2;
599 s->s.h.fixcompref = 0;
600 s->s.h.varcompref[0] = 1;
601 s->s.h.varcompref[1] = 2;
606 s->s.h.refreshctx = s->s.h.errorres ? 0 : get_bits1(&s->gb);
607 s->s.h.parallelmode = s->s.h.errorres ? 1 : get_bits1(&s->gb);
608 s->s.h.framectxid = c = get_bits(&s->gb, 2);
609 if (s->s.h.keyframe || s->s.h.intraonly)
610 s->s.h.framectxid = 0; // BUG: libvpx ignores this field in keyframes
612 /* loopfilter header data */
613 if (s->s.h.keyframe || s->s.h.errorres || s->s.h.intraonly) {
614 // reset loopfilter defaults
615 s->s.h.lf_delta.ref[0] = 1;
616 s->s.h.lf_delta.ref[1] = 0;
617 s->s.h.lf_delta.ref[2] = -1;
618 s->s.h.lf_delta.ref[3] = -1;
619 s->s.h.lf_delta.mode[0] = 0;
620 s->s.h.lf_delta.mode[1] = 0;
621 memset(s->s.h.segmentation.feat, 0, sizeof(s->s.h.segmentation.feat));
623 s->s.h.filter.level = get_bits(&s->gb, 6);
624 sharp = get_bits(&s->gb, 3);
625 // if sharpness changed, reinit lim/mblim LUTs. if it didn't change, keep
626 // the old cache values since they are still valid
627 if (s->s.h.filter.sharpness != sharp) {
628 for (i = 1; i <= 63; i++) {
632 limit >>= (sharp + 3) >> 2;
633 limit = FFMIN(limit, 9 - sharp);
635 limit = FFMAX(limit, 1);
637 s->filter_lut.lim_lut[i] = limit;
638 s->filter_lut.mblim_lut[i] = 2 * (i + 2) + limit;
641 s->s.h.filter.sharpness = sharp;
642 if ((s->s.h.lf_delta.enabled = get_bits1(&s->gb))) {
643 if ((s->s.h.lf_delta.updated = get_bits1(&s->gb))) {
644 for (i = 0; i < 4; i++)
645 if (get_bits1(&s->gb))
646 s->s.h.lf_delta.ref[i] = get_sbits_inv(&s->gb, 6);
647 for (i = 0; i < 2; i++)
648 if (get_bits1(&s->gb))
649 s->s.h.lf_delta.mode[i] = get_sbits_inv(&s->gb, 6);
653 /* quantization header data */
654 s->s.h.yac_qi = get_bits(&s->gb, 8);
655 s->s.h.ydc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
656 s->s.h.uvdc_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
657 s->s.h.uvac_qdelta = get_bits1(&s->gb) ? get_sbits_inv(&s->gb, 4) : 0;
658 s->s.h.lossless = s->s.h.yac_qi == 0 && s->s.h.ydc_qdelta == 0 &&
659 s->s.h.uvdc_qdelta == 0 && s->s.h.uvac_qdelta == 0;
661 avctx->properties |= FF_CODEC_PROPERTY_LOSSLESS;
663 /* segmentation header info */
664 if ((s->s.h.segmentation.enabled = get_bits1(&s->gb))) {
665 if ((s->s.h.segmentation.update_map = get_bits1(&s->gb))) {
666 for (i = 0; i < 7; i++)
667 s->s.h.segmentation.prob[i] = get_bits1(&s->gb) ?
668 get_bits(&s->gb, 8) : 255;
669 if ((s->s.h.segmentation.temporal = get_bits1(&s->gb)))
670 for (i = 0; i < 3; i++)
671 s->s.h.segmentation.pred_prob[i] = get_bits1(&s->gb) ?
672 get_bits(&s->gb, 8) : 255;
675 if (get_bits1(&s->gb)) {
676 s->s.h.segmentation.absolute_vals = get_bits1(&s->gb);
677 for (i = 0; i < 8; i++) {
678 if ((s->s.h.segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
679 s->s.h.segmentation.feat[i].q_val = get_sbits_inv(&s->gb, 8);
680 if ((s->s.h.segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
681 s->s.h.segmentation.feat[i].lf_val = get_sbits_inv(&s->gb, 6);
682 if ((s->s.h.segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
683 s->s.h.segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
684 s->s.h.segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
689 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
690 for (i = 0; i < (s->s.h.segmentation.enabled ? 8 : 1); i++) {
691 int qyac, qydc, quvac, quvdc, lflvl, sh;
693 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].q_enabled) {
694 if (s->s.h.segmentation.absolute_vals)
695 qyac = av_clip_uintp2(s->s.h.segmentation.feat[i].q_val, 8);
697 qyac = av_clip_uintp2(s->s.h.yac_qi + s->s.h.segmentation.feat[i].q_val, 8);
699 qyac = s->s.h.yac_qi;
701 qydc = av_clip_uintp2(qyac + s->s.h.ydc_qdelta, 8);
702 quvdc = av_clip_uintp2(qyac + s->s.h.uvdc_qdelta, 8);
703 quvac = av_clip_uintp2(qyac + s->s.h.uvac_qdelta, 8);
704 qyac = av_clip_uintp2(qyac, 8);
706 s->s.h.segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[s->bpp_index][qydc];
707 s->s.h.segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[s->bpp_index][qyac];
708 s->s.h.segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[s->bpp_index][quvdc];
709 s->s.h.segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[s->bpp_index][quvac];
711 sh = s->s.h.filter.level >= 32;
712 if (s->s.h.segmentation.enabled && s->s.h.segmentation.feat[i].lf_enabled) {
713 if (s->s.h.segmentation.absolute_vals)
714 lflvl = av_clip_uintp2(s->s.h.segmentation.feat[i].lf_val, 6);
716 lflvl = av_clip_uintp2(s->s.h.filter.level + s->s.h.segmentation.feat[i].lf_val, 6);
718 lflvl = s->s.h.filter.level;
720 if (s->s.h.lf_delta.enabled) {
721 s->s.h.segmentation.feat[i].lflvl[0][0] =
722 s->s.h.segmentation.feat[i].lflvl[0][1] =
723 av_clip_uintp2(lflvl + (s->s.h.lf_delta.ref[0] * (1 << sh)), 6);
724 for (j = 1; j < 4; j++) {
725 s->s.h.segmentation.feat[i].lflvl[j][0] =
726 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
727 s->s.h.lf_delta.mode[0]) * (1 << sh)), 6);
728 s->s.h.segmentation.feat[i].lflvl[j][1] =
729 av_clip_uintp2(lflvl + ((s->s.h.lf_delta.ref[j] +
730 s->s.h.lf_delta.mode[1]) * (1 << sh)), 6);
733 memset(s->s.h.segmentation.feat[i].lflvl, lflvl,
734 sizeof(s->s.h.segmentation.feat[i].lflvl));
739 if ((ret = update_size(avctx, w, h)) < 0) {
740 av_log(avctx, AV_LOG_ERROR, "Failed to initialize decoder for %dx%d @ %d\n",
744 for (s->s.h.tiling.log2_tile_cols = 0;
745 s->sb_cols > (64 << s->s.h.tiling.log2_tile_cols);
746 s->s.h.tiling.log2_tile_cols++) ;
747 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
748 max = FFMAX(0, max - 1);
749 while (max > s->s.h.tiling.log2_tile_cols) {
750 if (get_bits1(&s->gb))
751 s->s.h.tiling.log2_tile_cols++;
755 s->s.h.tiling.log2_tile_rows = decode012(&s->gb);
756 s->s.h.tiling.tile_rows = 1 << s->s.h.tiling.log2_tile_rows;
757 if (s->s.h.tiling.tile_cols != (1 << s->s.h.tiling.log2_tile_cols)) {
762 for (i = 0; i < s->active_tile_cols; i++) {
763 av_free(s->td[i].b_base);
764 av_free(s->td[i].block_base);
769 s->s.h.tiling.tile_cols = 1 << s->s.h.tiling.log2_tile_cols;
770 vp9_free_entries(avctx);
771 s->active_tile_cols = avctx->active_thread_type == FF_THREAD_SLICE ?
772 s->s.h.tiling.tile_cols : 1;
773 vp9_alloc_entries(avctx, s->sb_rows);
774 if (avctx->active_thread_type == FF_THREAD_SLICE) {
775 n_range_coders = 4; // max_tile_rows
777 n_range_coders = s->s.h.tiling.tile_cols;
779 s->td = av_mallocz_array(s->active_tile_cols, sizeof(VP9TileData) +
780 n_range_coders * sizeof(VP56RangeCoder));
782 return AVERROR(ENOMEM);
783 rc = (VP56RangeCoder *) &s->td[s->active_tile_cols];
784 for (i = 0; i < s->active_tile_cols; i++) {
787 rc += n_range_coders;
791 /* check reference frames */
792 if (!s->s.h.keyframe && !s->s.h.intraonly) {
793 for (i = 0; i < 3; i++) {
794 AVFrame *ref = s->s.refs[s->s.h.refidx[i]].f;
795 int refw = ref->width, refh = ref->height;
797 if (ref->format != avctx->pix_fmt) {
798 av_log(avctx, AV_LOG_ERROR,
799 "Ref pixfmt (%s) did not match current frame (%s)",
800 av_get_pix_fmt_name(ref->format),
801 av_get_pix_fmt_name(avctx->pix_fmt));
802 return AVERROR_INVALIDDATA;
803 } else if (refw == w && refh == h) {
804 s->mvscale[i][0] = s->mvscale[i][1] = 0;
806 if (w * 2 < refw || h * 2 < refh || w > 16 * refw || h > 16 * refh) {
807 av_log(avctx, AV_LOG_ERROR,
808 "Invalid ref frame dimensions %dx%d for frame size %dx%d\n",
810 return AVERROR_INVALIDDATA;
812 s->mvscale[i][0] = (refw << 14) / w;
813 s->mvscale[i][1] = (refh << 14) / h;
814 s->mvstep[i][0] = 16 * s->mvscale[i][0] >> 14;
815 s->mvstep[i][1] = 16 * s->mvscale[i][1] >> 14;
820 if (s->s.h.keyframe || s->s.h.errorres || (s->s.h.intraonly && s->s.h.resetctx == 3)) {
821 s->prob_ctx[0].p = s->prob_ctx[1].p = s->prob_ctx[2].p =
822 s->prob_ctx[3].p = ff_vp9_default_probs;
823 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
824 sizeof(ff_vp9_default_coef_probs));
825 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
826 sizeof(ff_vp9_default_coef_probs));
827 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
828 sizeof(ff_vp9_default_coef_probs));
829 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
830 sizeof(ff_vp9_default_coef_probs));
831 } else if (s->s.h.intraonly && s->s.h.resetctx == 2) {
832 s->prob_ctx[c].p = ff_vp9_default_probs;
833 memcpy(s->prob_ctx[c].coef, ff_vp9_default_coef_probs,
834 sizeof(ff_vp9_default_coef_probs));
837 // next 16 bits is size of the rest of the header (arith-coded)
838 s->s.h.compressed_header_size = size2 = get_bits(&s->gb, 16);
839 s->s.h.uncompressed_header_size = (get_bits_count(&s->gb) + 7) / 8;
841 data2 = align_get_bits(&s->gb);
842 if (size2 > size - (data2 - data)) {
843 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
844 return AVERROR_INVALIDDATA;
846 ret = ff_vp56_init_range_decoder(&s->c, data2, size2);
850 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
851 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
852 return AVERROR_INVALIDDATA;
855 for (i = 0; i < s->active_tile_cols; i++) {
856 if (s->s.h.keyframe || s->s.h.intraonly) {
857 memset(s->td[i].counts.coef, 0, sizeof(s->td[0].counts.coef));
858 memset(s->td[i].counts.eob, 0, sizeof(s->td[0].counts.eob));
860 memset(&s->td[i].counts, 0, sizeof(s->td[0].counts));
864 /* FIXME is it faster to not copy here, but do it down in the fw updates
865 * as explicit copies if the fw update is missing (and skip the copy upon
867 s->prob.p = s->prob_ctx[c].p;
870 if (s->s.h.lossless) {
871 s->s.h.txfmmode = TX_4X4;
873 s->s.h.txfmmode = vp8_rac_get_uint(&s->c, 2);
874 if (s->s.h.txfmmode == 3)
875 s->s.h.txfmmode += vp8_rac_get(&s->c);
877 if (s->s.h.txfmmode == TX_SWITCHABLE) {
878 for (i = 0; i < 2; i++)
879 if (vp56_rac_get_prob_branchy(&s->c, 252))
880 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
881 for (i = 0; i < 2; i++)
882 for (j = 0; j < 2; j++)
883 if (vp56_rac_get_prob_branchy(&s->c, 252))
884 s->prob.p.tx16p[i][j] =
885 update_prob(&s->c, s->prob.p.tx16p[i][j]);
886 for (i = 0; i < 2; i++)
887 for (j = 0; j < 3; j++)
888 if (vp56_rac_get_prob_branchy(&s->c, 252))
889 s->prob.p.tx32p[i][j] =
890 update_prob(&s->c, s->prob.p.tx32p[i][j]);
895 for (i = 0; i < 4; i++) {
896 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
897 if (vp8_rac_get(&s->c)) {
898 for (j = 0; j < 2; j++)
899 for (k = 0; k < 2; k++)
900 for (l = 0; l < 6; l++)
901 for (m = 0; m < 6; m++) {
902 uint8_t *p = s->prob.coef[i][j][k][l][m];
903 uint8_t *r = ref[j][k][l][m];
904 if (m >= 3 && l == 0) // dc only has 3 pt
906 for (n = 0; n < 3; n++) {
907 if (vp56_rac_get_prob_branchy(&s->c, 252))
908 p[n] = update_prob(&s->c, r[n]);
912 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
915 for (j = 0; j < 2; j++)
916 for (k = 0; k < 2; k++)
917 for (l = 0; l < 6; l++)
918 for (m = 0; m < 6; m++) {
919 uint8_t *p = s->prob.coef[i][j][k][l][m];
920 uint8_t *r = ref[j][k][l][m];
921 if (m > 3 && l == 0) // dc only has 3 pt
924 memcpy(&p[3], ff_vp9_model_pareto8[p[2]], 8);
927 if (s->s.h.txfmmode == i)
932 for (i = 0; i < 3; i++)
933 if (vp56_rac_get_prob_branchy(&s->c, 252))
934 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
935 if (!s->s.h.keyframe && !s->s.h.intraonly) {
936 for (i = 0; i < 7; i++)
937 for (j = 0; j < 3; j++)
938 if (vp56_rac_get_prob_branchy(&s->c, 252))
939 s->prob.p.mv_mode[i][j] =
940 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
942 if (s->s.h.filtermode == FILTER_SWITCHABLE)
943 for (i = 0; i < 4; i++)
944 for (j = 0; j < 2; j++)
945 if (vp56_rac_get_prob_branchy(&s->c, 252))
946 s->prob.p.filter[i][j] =
947 update_prob(&s->c, s->prob.p.filter[i][j]);
949 for (i = 0; i < 4; i++)
950 if (vp56_rac_get_prob_branchy(&s->c, 252))
951 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
953 if (s->s.h.allowcompinter) {
954 s->s.h.comppredmode = vp8_rac_get(&s->c);
955 if (s->s.h.comppredmode)
956 s->s.h.comppredmode += vp8_rac_get(&s->c);
957 if (s->s.h.comppredmode == PRED_SWITCHABLE)
958 for (i = 0; i < 5; i++)
959 if (vp56_rac_get_prob_branchy(&s->c, 252))
961 update_prob(&s->c, s->prob.p.comp[i]);
963 s->s.h.comppredmode = PRED_SINGLEREF;
966 if (s->s.h.comppredmode != PRED_COMPREF) {
967 for (i = 0; i < 5; i++) {
968 if (vp56_rac_get_prob_branchy(&s->c, 252))
969 s->prob.p.single_ref[i][0] =
970 update_prob(&s->c, s->prob.p.single_ref[i][0]);
971 if (vp56_rac_get_prob_branchy(&s->c, 252))
972 s->prob.p.single_ref[i][1] =
973 update_prob(&s->c, s->prob.p.single_ref[i][1]);
977 if (s->s.h.comppredmode != PRED_SINGLEREF) {
978 for (i = 0; i < 5; i++)
979 if (vp56_rac_get_prob_branchy(&s->c, 252))
980 s->prob.p.comp_ref[i] =
981 update_prob(&s->c, s->prob.p.comp_ref[i]);
984 for (i = 0; i < 4; i++)
985 for (j = 0; j < 9; j++)
986 if (vp56_rac_get_prob_branchy(&s->c, 252))
987 s->prob.p.y_mode[i][j] =
988 update_prob(&s->c, s->prob.p.y_mode[i][j]);
990 for (i = 0; i < 4; i++)
991 for (j = 0; j < 4; j++)
992 for (k = 0; k < 3; k++)
993 if (vp56_rac_get_prob_branchy(&s->c, 252))
994 s->prob.p.partition[3 - i][j][k] =
996 s->prob.p.partition[3 - i][j][k]);
998 // mv fields don't use the update_prob subexp model for some reason
999 for (i = 0; i < 3; i++)
1000 if (vp56_rac_get_prob_branchy(&s->c, 252))
1001 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1003 for (i = 0; i < 2; i++) {
1004 if (vp56_rac_get_prob_branchy(&s->c, 252))
1005 s->prob.p.mv_comp[i].sign =
1006 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1008 for (j = 0; j < 10; j++)
1009 if (vp56_rac_get_prob_branchy(&s->c, 252))
1010 s->prob.p.mv_comp[i].classes[j] =
1011 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1013 if (vp56_rac_get_prob_branchy(&s->c, 252))
1014 s->prob.p.mv_comp[i].class0 =
1015 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1017 for (j = 0; j < 10; j++)
1018 if (vp56_rac_get_prob_branchy(&s->c, 252))
1019 s->prob.p.mv_comp[i].bits[j] =
1020 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1023 for (i = 0; i < 2; i++) {
1024 for (j = 0; j < 2; j++)
1025 for (k = 0; k < 3; k++)
1026 if (vp56_rac_get_prob_branchy(&s->c, 252))
1027 s->prob.p.mv_comp[i].class0_fp[j][k] =
1028 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1030 for (j = 0; j < 3; j++)
1031 if (vp56_rac_get_prob_branchy(&s->c, 252))
1032 s->prob.p.mv_comp[i].fp[j] =
1033 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1036 if (s->s.h.highprecisionmvs) {
1037 for (i = 0; i < 2; i++) {
1038 if (vp56_rac_get_prob_branchy(&s->c, 252))
1039 s->prob.p.mv_comp[i].class0_hp =
1040 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1042 if (vp56_rac_get_prob_branchy(&s->c, 252))
1043 s->prob.p.mv_comp[i].hp =
1044 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
1049 return (data2 - data) + size2;
1052 static void decode_sb(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1053 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1055 const VP9Context *s = td->s;
1056 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
1057 (((td->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
1058 const uint8_t *p = s->s.h.keyframe || s->s.h.intraonly ? ff_vp9_default_kf_partition_probs[bl][c] :
1059 s->prob.p.partition[bl][c];
1060 enum BlockPartition bp;
1061 ptrdiff_t hbs = 4 >> bl;
1062 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1063 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1064 int bytesperpixel = s->bytesperpixel;
1067 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1068 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1069 } else if (col + hbs < s->cols) { // FIXME why not <=?
1070 if (row + hbs < s->rows) { // FIXME why not <=?
1071 bp = vp8_rac_get_tree(td->c, ff_vp9_partition_tree, p);
1073 case PARTITION_NONE:
1074 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1077 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1078 yoff += hbs * 8 * y_stride;
1079 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1080 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, bl, bp);
1083 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1084 yoff += hbs * 8 * bytesperpixel;
1085 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1086 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, bl, bp);
1088 case PARTITION_SPLIT:
1089 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1090 decode_sb(td, row, col + hbs, lflvl,
1091 yoff + 8 * hbs * bytesperpixel,
1092 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1093 yoff += hbs * 8 * y_stride;
1094 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1095 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1096 decode_sb(td, row + hbs, col + hbs, lflvl,
1097 yoff + 8 * hbs * bytesperpixel,
1098 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1103 } else if (vp56_rac_get_prob_branchy(td->c, p[1])) {
1104 bp = PARTITION_SPLIT;
1105 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1106 decode_sb(td, row, col + hbs, lflvl,
1107 yoff + 8 * hbs * bytesperpixel,
1108 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1111 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1113 } else if (row + hbs < s->rows) { // FIXME why not <=?
1114 if (vp56_rac_get_prob_branchy(td->c, p[2])) {
1115 bp = PARTITION_SPLIT;
1116 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1117 yoff += hbs * 8 * y_stride;
1118 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1119 decode_sb(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1122 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, bl, bp);
1125 bp = PARTITION_SPLIT;
1126 decode_sb(td, row, col, lflvl, yoff, uvoff, bl + 1);
1128 td->counts.partition[bl][c][bp]++;
1131 static void decode_sb_mem(VP9TileData *td, int row, int col, VP9Filter *lflvl,
1132 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
1134 const VP9Context *s = td->s;
1135 VP9Block *b = td->b;
1136 ptrdiff_t hbs = 4 >> bl;
1137 AVFrame *f = s->s.frames[CUR_FRAME].tf.f;
1138 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
1139 int bytesperpixel = s->bytesperpixel;
1142 av_assert2(b->bl == BL_8X8);
1143 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1144 } else if (td->b->bl == bl) {
1145 ff_vp9_decode_block(td, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
1146 if (b->bp == PARTITION_H && row + hbs < s->rows) {
1147 yoff += hbs * 8 * y_stride;
1148 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1149 ff_vp9_decode_block(td, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
1150 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
1151 yoff += hbs * 8 * bytesperpixel;
1152 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1153 ff_vp9_decode_block(td, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
1156 decode_sb_mem(td, row, col, lflvl, yoff, uvoff, bl + 1);
1157 if (col + hbs < s->cols) { // FIXME why not <=?
1158 if (row + hbs < s->rows) {
1159 decode_sb_mem(td, row, col + hbs, lflvl, yoff + 8 * hbs * bytesperpixel,
1160 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1161 yoff += hbs * 8 * y_stride;
1162 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1163 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1164 decode_sb_mem(td, row + hbs, col + hbs, lflvl,
1165 yoff + 8 * hbs * bytesperpixel,
1166 uvoff + (8 * hbs * bytesperpixel >> s->ss_h), bl + 1);
1168 yoff += hbs * 8 * bytesperpixel;
1169 uvoff += hbs * 8 * bytesperpixel >> s->ss_h;
1170 decode_sb_mem(td, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
1172 } else if (row + hbs < s->rows) {
1173 yoff += hbs * 8 * y_stride;
1174 uvoff += hbs * 8 * uv_stride >> s->ss_v;
1175 decode_sb_mem(td, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
1180 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1182 int sb_start = ( idx * n) >> log2_n;
1183 int sb_end = ((idx + 1) * n) >> log2_n;
1184 *start = FFMIN(sb_start, n) << 3;
1185 *end = FFMIN(sb_end, n) << 3;
1188 static void free_buffers(VP9Context *s)
1192 av_freep(&s->intra_pred_data[0]);
1193 for (i = 0; i < s->active_tile_cols; i++) {
1194 av_freep(&s->td[i].b_base);
1195 av_freep(&s->td[i].block_base);
1199 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1201 VP9Context *s = avctx->priv_data;
1204 for (i = 0; i < 3; i++) {
1205 if (s->s.frames[i].tf.f->buf[0])
1206 vp9_frame_unref(avctx, &s->s.frames[i]);
1207 av_frame_free(&s->s.frames[i].tf.f);
1209 for (i = 0; i < 8; i++) {
1210 if (s->s.refs[i].f->buf[0])
1211 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1212 av_frame_free(&s->s.refs[i].f);
1213 if (s->next_refs[i].f->buf[0])
1214 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1215 av_frame_free(&s->next_refs[i].f);
1219 vp9_free_entries(avctx);
1224 static int decode_tiles(AVCodecContext *avctx,
1225 const uint8_t *data, int size)
1227 VP9Context *s = avctx->priv_data;
1228 VP9TileData *td = &s->td[0];
1229 int row, col, tile_row, tile_col, ret;
1231 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1233 ptrdiff_t yoff, uvoff, ls_y, ls_uv;
1235 f = s->s.frames[CUR_FRAME].tf.f;
1236 ls_y = f->linesize[0];
1237 ls_uv =f->linesize[1];
1238 bytesperpixel = s->bytesperpixel;
1241 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1242 set_tile_offset(&tile_row_start, &tile_row_end,
1243 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1245 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1248 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1249 tile_row == s->s.h.tiling.tile_rows - 1) {
1252 tile_size = AV_RB32(data);
1256 if (tile_size > size) {
1257 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1258 return AVERROR_INVALIDDATA;
1260 ret = ff_vp56_init_range_decoder(&td->c_b[tile_col], data, tile_size);
1263 if (vp56_rac_get_prob_branchy(&td->c_b[tile_col], 128)) { // marker bit
1264 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1265 return AVERROR_INVALIDDATA;
1271 for (row = tile_row_start; row < tile_row_end;
1272 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1273 VP9Filter *lflvl_ptr = s->lflvl;
1274 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1276 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1277 set_tile_offset(&tile_col_start, &tile_col_end,
1278 tile_col, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1279 td->tile_col_start = tile_col_start;
1281 memset(td->left_partition_ctx, 0, 8);
1282 memset(td->left_skip_ctx, 0, 8);
1283 if (s->s.h.keyframe || s->s.h.intraonly) {
1284 memset(td->left_mode_ctx, DC_PRED, 16);
1286 memset(td->left_mode_ctx, NEARESTMV, 8);
1288 memset(td->left_y_nnz_ctx, 0, 16);
1289 memset(td->left_uv_nnz_ctx, 0, 32);
1290 memset(td->left_segpred_ctx, 0, 8);
1292 td->c = &td->c_b[tile_col];
1295 for (col = tile_col_start;
1297 col += 8, yoff2 += 64 * bytesperpixel,
1298 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1299 // FIXME integrate with lf code (i.e. zero after each
1300 // use, similar to invtxfm coefficients, or similar)
1302 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1306 decode_sb_mem(td, row, col, lflvl_ptr,
1307 yoff2, uvoff2, BL_64X64);
1309 if (vpX_rac_is_end(td->c)) {
1310 return AVERROR_INVALIDDATA;
1312 decode_sb(td, row, col, lflvl_ptr,
1313 yoff2, uvoff2, BL_64X64);
1321 // backup pre-loopfilter reconstruction data for intra
1322 // prediction of next row of sb64s
1323 if (row + 8 < s->rows) {
1324 memcpy(s->intra_pred_data[0],
1325 f->data[0] + yoff + 63 * ls_y,
1326 8 * s->cols * bytesperpixel);
1327 memcpy(s->intra_pred_data[1],
1328 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1329 8 * s->cols * bytesperpixel >> s->ss_h);
1330 memcpy(s->intra_pred_data[2],
1331 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1332 8 * s->cols * bytesperpixel >> s->ss_h);
1335 // loopfilter one row
1336 if (s->s.h.filter.level) {
1339 lflvl_ptr = s->lflvl;
1340 for (col = 0; col < s->cols;
1341 col += 8, yoff2 += 64 * bytesperpixel,
1342 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1343 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, row, col,
1348 // FIXME maybe we can make this more finegrained by running the
1349 // loopfilter per-block instead of after each sbrow
1350 // In fact that would also make intra pred left preparation easier?
1351 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, row >> 3, 0);
1358 static av_always_inline
1359 int decode_tiles_mt(AVCodecContext *avctx, void *tdata, int jobnr,
1362 VP9Context *s = avctx->priv_data;
1363 VP9TileData *td = &s->td[jobnr];
1364 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1365 int bytesperpixel = s->bytesperpixel, row, col, tile_row;
1366 unsigned tile_cols_len;
1367 int tile_row_start, tile_row_end, tile_col_start, tile_col_end;
1368 VP9Filter *lflvl_ptr_base;
1371 f = s->s.frames[CUR_FRAME].tf.f;
1372 ls_y = f->linesize[0];
1373 ls_uv =f->linesize[1];
1375 set_tile_offset(&tile_col_start, &tile_col_end,
1376 jobnr, s->s.h.tiling.log2_tile_cols, s->sb_cols);
1377 td->tile_col_start = tile_col_start;
1378 uvoff = (64 * bytesperpixel >> s->ss_h)*(tile_col_start >> 3);
1379 yoff = (64 * bytesperpixel)*(tile_col_start >> 3);
1380 lflvl_ptr_base = s->lflvl+(tile_col_start >> 3);
1382 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1383 set_tile_offset(&tile_row_start, &tile_row_end,
1384 tile_row, s->s.h.tiling.log2_tile_rows, s->sb_rows);
1386 td->c = &td->c_b[tile_row];
1387 for (row = tile_row_start; row < tile_row_end;
1388 row += 8, yoff += ls_y * 64, uvoff += ls_uv * 64 >> s->ss_v) {
1389 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1390 VP9Filter *lflvl_ptr = lflvl_ptr_base+s->sb_cols*(row >> 3);
1392 memset(td->left_partition_ctx, 0, 8);
1393 memset(td->left_skip_ctx, 0, 8);
1394 if (s->s.h.keyframe || s->s.h.intraonly) {
1395 memset(td->left_mode_ctx, DC_PRED, 16);
1397 memset(td->left_mode_ctx, NEARESTMV, 8);
1399 memset(td->left_y_nnz_ctx, 0, 16);
1400 memset(td->left_uv_nnz_ctx, 0, 32);
1401 memset(td->left_segpred_ctx, 0, 8);
1403 for (col = tile_col_start;
1405 col += 8, yoff2 += 64 * bytesperpixel,
1406 uvoff2 += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1407 // FIXME integrate with lf code (i.e. zero after each
1408 // use, similar to invtxfm coefficients, or similar)
1409 memset(lflvl_ptr->mask, 0, sizeof(lflvl_ptr->mask));
1410 decode_sb(td, row, col, lflvl_ptr,
1411 yoff2, uvoff2, BL_64X64);
1414 // backup pre-loopfilter reconstruction data for intra
1415 // prediction of next row of sb64s
1416 tile_cols_len = tile_col_end - tile_col_start;
1417 if (row + 8 < s->rows) {
1418 memcpy(s->intra_pred_data[0] + (tile_col_start * 8 * bytesperpixel),
1419 f->data[0] + yoff + 63 * ls_y,
1420 8 * tile_cols_len * bytesperpixel);
1421 memcpy(s->intra_pred_data[1] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1422 f->data[1] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1423 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1424 memcpy(s->intra_pred_data[2] + (tile_col_start * 8 * bytesperpixel >> s->ss_h),
1425 f->data[2] + uvoff + ((64 >> s->ss_v) - 1) * ls_uv,
1426 8 * tile_cols_len * bytesperpixel >> s->ss_h);
1429 vp9_report_tile_progress(s, row >> 3, 1);
1435 static av_always_inline
1436 int loopfilter_proc(AVCodecContext *avctx)
1438 VP9Context *s = avctx->priv_data;
1439 ptrdiff_t uvoff, yoff, ls_y, ls_uv;
1440 VP9Filter *lflvl_ptr;
1441 int bytesperpixel = s->bytesperpixel, col, i;
1444 f = s->s.frames[CUR_FRAME].tf.f;
1445 ls_y = f->linesize[0];
1446 ls_uv =f->linesize[1];
1448 for (i = 0; i < s->sb_rows; i++) {
1449 vp9_await_tile_progress(s, i, s->s.h.tiling.tile_cols);
1451 if (s->s.h.filter.level) {
1452 yoff = (ls_y * 64)*i;
1453 uvoff = (ls_uv * 64 >> s->ss_v)*i;
1454 lflvl_ptr = s->lflvl+s->sb_cols*i;
1455 for (col = 0; col < s->cols;
1456 col += 8, yoff += 64 * bytesperpixel,
1457 uvoff += 64 * bytesperpixel >> s->ss_h, lflvl_ptr++) {
1458 ff_vp9_loopfilter_sb(avctx, lflvl_ptr, i << 3, col,
1467 static int vp9_decode_frame(AVCodecContext *avctx, void *frame,
1468 int *got_frame, AVPacket *pkt)
1470 const uint8_t *data = pkt->data;
1471 int size = pkt->size;
1472 VP9Context *s = avctx->priv_data;
1474 int retain_segmap_ref = s->s.frames[REF_FRAME_SEGMAP].segmentation_map &&
1475 (!s->s.h.segmentation.enabled || !s->s.h.segmentation.update_map);
1478 if ((ret = decode_frame_header(avctx, data, size, &ref)) < 0) {
1480 } else if (ret == 0) {
1481 if (!s->s.refs[ref].f->buf[0]) {
1482 av_log(avctx, AV_LOG_ERROR, "Requested reference %d not available\n", ref);
1483 return AVERROR_INVALIDDATA;
1485 if ((ret = av_frame_ref(frame, s->s.refs[ref].f)) < 0)
1487 ((AVFrame *)frame)->pts = pkt->pts;
1489 FF_DISABLE_DEPRECATION_WARNINGS
1490 ((AVFrame *)frame)->pkt_pts = pkt->pts;
1491 FF_ENABLE_DEPRECATION_WARNINGS
1493 ((AVFrame *)frame)->pkt_dts = pkt->dts;
1494 for (i = 0; i < 8; i++) {
1495 if (s->next_refs[i].f->buf[0])
1496 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1497 if (s->s.refs[i].f->buf[0] &&
1498 (ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i])) < 0)
1507 if (!retain_segmap_ref || s->s.h.keyframe || s->s.h.intraonly) {
1508 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0])
1509 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1510 if (!s->s.h.keyframe && !s->s.h.intraonly && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1511 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_SEGMAP], &s->s.frames[CUR_FRAME])) < 0)
1514 if (s->s.frames[REF_FRAME_MVPAIR].tf.f->buf[0])
1515 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_MVPAIR]);
1516 if (!s->s.h.intraonly && !s->s.h.keyframe && !s->s.h.errorres && s->s.frames[CUR_FRAME].tf.f->buf[0] &&
1517 (ret = vp9_frame_ref(avctx, &s->s.frames[REF_FRAME_MVPAIR], &s->s.frames[CUR_FRAME])) < 0)
1519 if (s->s.frames[CUR_FRAME].tf.f->buf[0])
1520 vp9_frame_unref(avctx, &s->s.frames[CUR_FRAME]);
1521 if ((ret = vp9_frame_alloc(avctx, &s->s.frames[CUR_FRAME])) < 0)
1523 f = s->s.frames[CUR_FRAME].tf.f;
1524 f->key_frame = s->s.h.keyframe;
1525 f->pict_type = (s->s.h.keyframe || s->s.h.intraonly) ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1527 if (s->s.frames[REF_FRAME_SEGMAP].tf.f->buf[0] &&
1528 (s->s.frames[REF_FRAME_MVPAIR].tf.f->width != s->s.frames[CUR_FRAME].tf.f->width ||
1529 s->s.frames[REF_FRAME_MVPAIR].tf.f->height != s->s.frames[CUR_FRAME].tf.f->height)) {
1530 vp9_frame_unref(avctx, &s->s.frames[REF_FRAME_SEGMAP]);
1534 for (i = 0; i < 8; i++) {
1535 if (s->next_refs[i].f->buf[0])
1536 ff_thread_release_buffer(avctx, &s->next_refs[i]);
1537 if (s->s.h.refreshrefmask & (1 << i)) {
1538 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.frames[CUR_FRAME].tf);
1539 } else if (s->s.refs[i].f->buf[0]) {
1540 ret = ff_thread_ref_frame(&s->next_refs[i], &s->s.refs[i]);
1546 if (avctx->hwaccel) {
1547 ret = avctx->hwaccel->start_frame(avctx, NULL, 0);
1550 ret = avctx->hwaccel->decode_slice(avctx, pkt->data, pkt->size);
1553 ret = avctx->hwaccel->end_frame(avctx);
1559 // main tile decode loop
1560 memset(s->above_partition_ctx, 0, s->cols);
1561 memset(s->above_skip_ctx, 0, s->cols);
1562 if (s->s.h.keyframe || s->s.h.intraonly) {
1563 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1565 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1567 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1568 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 16 >> s->ss_h);
1569 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 16 >> s->ss_h);
1570 memset(s->above_segpred_ctx, 0, s->cols);
1571 s->pass = s->s.frames[CUR_FRAME].uses_2pass =
1572 avctx->active_thread_type == FF_THREAD_FRAME && s->s.h.refreshctx && !s->s.h.parallelmode;
1573 if ((ret = update_block_buffers(avctx)) < 0) {
1574 av_log(avctx, AV_LOG_ERROR,
1575 "Failed to allocate block buffers\n");
1578 if (s->s.h.refreshctx && s->s.h.parallelmode) {
1581 for (i = 0; i < 4; i++) {
1582 for (j = 0; j < 2; j++)
1583 for (k = 0; k < 2; k++)
1584 for (l = 0; l < 6; l++)
1585 for (m = 0; m < 6; m++)
1586 memcpy(s->prob_ctx[s->s.h.framectxid].coef[i][j][k][l][m],
1587 s->prob.coef[i][j][k][l][m], 3);
1588 if (s->s.h.txfmmode == i)
1591 s->prob_ctx[s->s.h.framectxid].p = s->prob.p;
1592 ff_thread_finish_setup(avctx);
1593 } else if (!s->s.h.refreshctx) {
1594 ff_thread_finish_setup(avctx);
1598 if (avctx->active_thread_type & FF_THREAD_SLICE) {
1599 for (i = 0; i < s->sb_rows; i++)
1600 atomic_store(&s->entries[i], 0);
1605 for (i = 0; i < s->active_tile_cols; i++) {
1606 s->td[i].b = s->td[i].b_base;
1607 s->td[i].block = s->td[i].block_base;
1608 s->td[i].uvblock[0] = s->td[i].uvblock_base[0];
1609 s->td[i].uvblock[1] = s->td[i].uvblock_base[1];
1610 s->td[i].eob = s->td[i].eob_base;
1611 s->td[i].uveob[0] = s->td[i].uveob_base[0];
1612 s->td[i].uveob[1] = s->td[i].uveob_base[1];
1616 if (avctx->active_thread_type == FF_THREAD_SLICE) {
1617 int tile_row, tile_col;
1619 av_assert1(!s->pass);
1621 for (tile_row = 0; tile_row < s->s.h.tiling.tile_rows; tile_row++) {
1622 for (tile_col = 0; tile_col < s->s.h.tiling.tile_cols; tile_col++) {
1625 if (tile_col == s->s.h.tiling.tile_cols - 1 &&
1626 tile_row == s->s.h.tiling.tile_rows - 1) {
1629 tile_size = AV_RB32(data);
1633 if (tile_size > size)
1634 return AVERROR_INVALIDDATA;
1635 ret = ff_vp56_init_range_decoder(&s->td[tile_col].c_b[tile_row], data, tile_size);
1638 if (vp56_rac_get_prob_branchy(&s->td[tile_col].c_b[tile_row], 128)) // marker bit
1639 return AVERROR_INVALIDDATA;
1645 ff_slice_thread_execute_with_mainfunc(avctx, decode_tiles_mt, loopfilter_proc, s->td, NULL, s->s.h.tiling.tile_cols);
1649 ret = decode_tiles(avctx, data, size);
1651 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1656 // Sum all counts fields into td[0].counts for tile threading
1657 if (avctx->active_thread_type == FF_THREAD_SLICE)
1658 for (i = 1; i < s->s.h.tiling.tile_cols; i++)
1659 for (j = 0; j < sizeof(s->td[i].counts) / sizeof(unsigned); j++)
1660 ((unsigned *)&s->td[0].counts)[j] += ((unsigned *)&s->td[i].counts)[j];
1662 if (s->pass < 2 && s->s.h.refreshctx && !s->s.h.parallelmode) {
1663 ff_vp9_adapt_probs(s);
1664 ff_thread_finish_setup(avctx);
1666 } while (s->pass++ == 1);
1667 ff_thread_report_progress(&s->s.frames[CUR_FRAME].tf, INT_MAX, 0);
1671 for (i = 0; i < 8; i++) {
1672 if (s->s.refs[i].f->buf[0])
1673 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1674 if (s->next_refs[i].f->buf[0] &&
1675 (ret = ff_thread_ref_frame(&s->s.refs[i], &s->next_refs[i])) < 0)
1679 if (!s->s.h.invisible) {
1680 if ((ret = av_frame_ref(frame, s->s.frames[CUR_FRAME].tf.f)) < 0)
1688 static void vp9_decode_flush(AVCodecContext *avctx)
1690 VP9Context *s = avctx->priv_data;
1693 for (i = 0; i < 3; i++)
1694 vp9_frame_unref(avctx, &s->s.frames[i]);
1695 for (i = 0; i < 8; i++)
1696 ff_thread_release_buffer(avctx, &s->s.refs[i]);
1699 static int init_frames(AVCodecContext *avctx)
1701 VP9Context *s = avctx->priv_data;
1704 for (i = 0; i < 3; i++) {
1705 s->s.frames[i].tf.f = av_frame_alloc();
1706 if (!s->s.frames[i].tf.f) {
1707 vp9_decode_free(avctx);
1708 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1709 return AVERROR(ENOMEM);
1712 for (i = 0; i < 8; i++) {
1713 s->s.refs[i].f = av_frame_alloc();
1714 s->next_refs[i].f = av_frame_alloc();
1715 if (!s->s.refs[i].f || !s->next_refs[i].f) {
1716 vp9_decode_free(avctx);
1717 av_log(avctx, AV_LOG_ERROR, "Failed to allocate frame buffer %d\n", i);
1718 return AVERROR(ENOMEM);
1725 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1727 VP9Context *s = avctx->priv_data;
1729 avctx->internal->allocate_progress = 1;
1731 s->s.h.filter.sharpness = -1;
1733 return init_frames(avctx);
1737 static av_cold int vp9_decode_init_thread_copy(AVCodecContext *avctx)
1739 return init_frames(avctx);
1742 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1745 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1747 for (i = 0; i < 3; i++) {
1748 if (s->s.frames[i].tf.f->buf[0])
1749 vp9_frame_unref(dst, &s->s.frames[i]);
1750 if (ssrc->s.frames[i].tf.f->buf[0]) {
1751 if ((ret = vp9_frame_ref(dst, &s->s.frames[i], &ssrc->s.frames[i])) < 0)
1755 for (i = 0; i < 8; i++) {
1756 if (s->s.refs[i].f->buf[0])
1757 ff_thread_release_buffer(dst, &s->s.refs[i]);
1758 if (ssrc->next_refs[i].f->buf[0]) {
1759 if ((ret = ff_thread_ref_frame(&s->s.refs[i], &ssrc->next_refs[i])) < 0)
1764 s->s.h.invisible = ssrc->s.h.invisible;
1765 s->s.h.keyframe = ssrc->s.h.keyframe;
1766 s->s.h.intraonly = ssrc->s.h.intraonly;
1767 s->ss_v = ssrc->ss_v;
1768 s->ss_h = ssrc->ss_h;
1769 s->s.h.segmentation.enabled = ssrc->s.h.segmentation.enabled;
1770 s->s.h.segmentation.update_map = ssrc->s.h.segmentation.update_map;
1771 s->s.h.segmentation.absolute_vals = ssrc->s.h.segmentation.absolute_vals;
1772 s->bytesperpixel = ssrc->bytesperpixel;
1773 s->gf_fmt = ssrc->gf_fmt;
1776 s->s.h.bpp = ssrc->s.h.bpp;
1777 s->bpp_index = ssrc->bpp_index;
1778 s->pix_fmt = ssrc->pix_fmt;
1779 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1780 memcpy(&s->s.h.lf_delta, &ssrc->s.h.lf_delta, sizeof(s->s.h.lf_delta));
1781 memcpy(&s->s.h.segmentation.feat, &ssrc->s.h.segmentation.feat,
1782 sizeof(s->s.h.segmentation.feat));
1788 AVCodec ff_vp9_decoder = {
1790 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1791 .type = AVMEDIA_TYPE_VIDEO,
1792 .id = AV_CODEC_ID_VP9,
1793 .priv_data_size = sizeof(VP9Context),
1794 .init = vp9_decode_init,
1795 .close = vp9_decode_free,
1796 .decode = vp9_decode_frame,
1797 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS | AV_CODEC_CAP_SLICE_THREADS,
1798 .caps_internal = FF_CODEC_CAP_SLICE_THREAD_HAS_MF,
1799 .flush = vp9_decode_flush,
1800 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp9_decode_init_thread_copy),
1801 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp9_decode_update_thread_context),
1802 .profiles = NULL_IF_CONFIG_SMALL(ff_vp9_profiles),
1803 .bsfs = "vp9_superframe_split",
1804 .hw_configs = (const AVCodecHWConfigInternal*[]) {
1805 #if CONFIG_VP9_DXVA2_HWACCEL
1808 #if CONFIG_VP9_D3D11VA_HWACCEL
1809 HWACCEL_D3D11VA(vp9),
1811 #if CONFIG_VP9_D3D11VA2_HWACCEL
1812 HWACCEL_D3D11VA2(vp9),
1814 #if CONFIG_VP9_NVDEC_HWACCEL
1817 #if CONFIG_VP9_VAAPI_HWACCEL