2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
28 #include "libavutil/mem_internal.h"
34 #include "rectangle.h"
43 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
44 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
45 #elif CONFIG_VP7_DECODER
46 #define VPX(vp7, f) vp7_ ## f
47 #else // CONFIG_VP8_DECODER
48 #define VPX(vp7, f) vp8_ ## f
51 static void free_buffers(VP8Context *s)
55 for (i = 0; i < MAX_THREADS; i++) {
57 pthread_cond_destroy(&s->thread_data[i].cond);
58 pthread_mutex_destroy(&s->thread_data[i].lock);
60 av_freep(&s->thread_data[i].filter_strength);
62 av_freep(&s->thread_data);
63 av_freep(&s->macroblocks_base);
64 av_freep(&s->intra4x4_pred_mode_top);
65 av_freep(&s->top_nnz);
66 av_freep(&s->top_border);
68 s->macroblocks = NULL;
71 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
74 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
75 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
77 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
79 if (s->avctx->hwaccel) {
80 const AVHWAccel *hwaccel = s->avctx->hwaccel;
81 if (hwaccel->frame_priv_data_size) {
82 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
83 if (!f->hwaccel_priv_buf)
85 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
91 av_buffer_unref(&f->seg_map);
92 ff_thread_release_buffer(s->avctx, &f->tf);
93 return AVERROR(ENOMEM);
96 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
98 av_buffer_unref(&f->seg_map);
99 av_buffer_unref(&f->hwaccel_priv_buf);
100 f->hwaccel_picture_private = NULL;
101 ff_thread_release_buffer(s->avctx, &f->tf);
104 #if CONFIG_VP8_DECODER
105 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
109 vp8_release_frame(s, dst);
111 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
114 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
115 vp8_release_frame(s, dst);
116 return AVERROR(ENOMEM);
118 if (src->hwaccel_picture_private) {
119 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
120 if (!dst->hwaccel_priv_buf)
121 return AVERROR(ENOMEM);
122 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
127 #endif /* CONFIG_VP8_DECODER */
129 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
131 VP8Context *s = avctx->priv_data;
134 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
135 vp8_release_frame(s, &s->frames[i]);
136 memset(s->framep, 0, sizeof(s->framep));
142 static void vp8_decode_flush(AVCodecContext *avctx)
144 vp8_decode_flush_impl(avctx, 0);
147 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
149 VP8Frame *frame = NULL;
152 // find a free buffer
153 for (i = 0; i < 5; i++)
154 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
155 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
157 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
158 frame = &s->frames[i];
162 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
165 if (frame->tf.f->buf[0])
166 vp8_release_frame(s, frame);
171 static enum AVPixelFormat get_pixel_format(VP8Context *s)
173 enum AVPixelFormat pix_fmts[] = {
174 #if CONFIG_VP8_VAAPI_HWACCEL
177 #if CONFIG_VP8_NVDEC_HWACCEL
184 return ff_get_format(s->avctx, pix_fmts);
187 static av_always_inline
188 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
190 AVCodecContext *avctx = s->avctx;
191 int i, ret, dim_reset = 0;
193 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
194 height != s->avctx->height) {
195 vp8_decode_flush_impl(s->avctx, 1);
197 ret = ff_set_dimensions(s->avctx, width, height);
201 dim_reset = (s->macroblocks_base != NULL);
204 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
205 !s->actually_webp && !is_vp7) {
206 s->pix_fmt = get_pixel_format(s);
208 return AVERROR(EINVAL);
209 avctx->pix_fmt = s->pix_fmt;
212 s->mb_width = (s->avctx->coded_width + 15) / 16;
213 s->mb_height = (s->avctx->coded_height + 15) / 16;
215 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
216 avctx->thread_count > 1;
217 if (!s->mb_layout) { // Frame threading and one thread
218 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
219 sizeof(*s->macroblocks));
220 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
221 } else // Sliced threading
222 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
223 sizeof(*s->macroblocks));
224 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
225 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
226 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
228 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
229 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
231 return AVERROR(ENOMEM);
234 for (i = 0; i < MAX_THREADS; i++) {
235 s->thread_data[i].filter_strength =
236 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
237 if (!s->thread_data[i].filter_strength) {
239 return AVERROR(ENOMEM);
242 pthread_mutex_init(&s->thread_data[i].lock, NULL);
243 pthread_cond_init(&s->thread_data[i].cond, NULL);
247 s->macroblocks = s->macroblocks_base + 1;
252 static int vp7_update_dimensions(VP8Context *s, int width, int height)
254 return update_dimensions(s, width, height, IS_VP7);
257 static int vp8_update_dimensions(VP8Context *s, int width, int height)
259 return update_dimensions(s, width, height, IS_VP8);
263 static void parse_segment_info(VP8Context *s)
265 VP56RangeCoder *c = &s->c;
268 s->segmentation.update_map = vp8_rac_get(c);
269 s->segmentation.update_feature_data = vp8_rac_get(c);
271 if (s->segmentation.update_feature_data) {
272 s->segmentation.absolute_vals = vp8_rac_get(c);
274 for (i = 0; i < 4; i++)
275 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
277 for (i = 0; i < 4; i++)
278 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
280 if (s->segmentation.update_map)
281 for (i = 0; i < 3; i++)
282 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
285 static void update_lf_deltas(VP8Context *s)
287 VP56RangeCoder *c = &s->c;
290 for (i = 0; i < 4; i++) {
291 if (vp8_rac_get(c)) {
292 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
295 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
299 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
300 if (vp8_rac_get(c)) {
301 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
304 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
309 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
311 const uint8_t *sizes = buf;
315 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
317 buf += 3 * (s->num_coeff_partitions - 1);
318 buf_size -= 3 * (s->num_coeff_partitions - 1);
322 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
323 int size = AV_RL24(sizes + 3 * i);
324 if (buf_size - size < 0)
326 s->coeff_partition_size[i] = size;
328 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
335 s->coeff_partition_size[i] = buf_size;
336 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
341 static void vp7_get_quants(VP8Context *s)
343 VP56RangeCoder *c = &s->c;
345 int yac_qi = vp8_rac_get_uint(c, 7);
346 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
349 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
350 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
352 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
353 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
354 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
355 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
356 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
357 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
360 static void vp8_get_quants(VP8Context *s)
362 VP56RangeCoder *c = &s->c;
365 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
366 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
367 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
368 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
369 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
370 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
372 for (i = 0; i < 4; i++) {
373 if (s->segmentation.enabled) {
374 base_qi = s->segmentation.base_quant[i];
375 if (!s->segmentation.absolute_vals)
376 base_qi += s->quant.yac_qi;
378 base_qi = s->quant.yac_qi;
380 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
381 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
382 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
383 /* 101581>>16 is equivalent to 155/100 */
384 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
385 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
386 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
388 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
389 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
394 * Determine which buffers golden and altref should be updated with after this frame.
395 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
397 * Intra frames update all 3 references
398 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
399 * If the update (golden|altref) flag is set, it's updated with the current frame
400 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
401 * If the flag is not set, the number read means:
403 * 1: VP56_FRAME_PREVIOUS
404 * 2: update golden with altref, or update altref with golden
406 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
408 VP56RangeCoder *c = &s->c;
411 return VP56_FRAME_CURRENT;
413 switch (vp8_rac_get_uint(c, 2)) {
415 return VP56_FRAME_PREVIOUS;
417 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
419 return VP56_FRAME_NONE;
422 static void vp78_reset_probability_tables(VP8Context *s)
425 for (i = 0; i < 4; i++)
426 for (j = 0; j < 16; j++)
427 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
428 sizeof(s->prob->token[i][j]));
431 static void vp78_update_probability_tables(VP8Context *s)
433 VP56RangeCoder *c = &s->c;
436 for (i = 0; i < 4; i++)
437 for (j = 0; j < 8; j++)
438 for (k = 0; k < 3; k++)
439 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
440 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
441 int prob = vp8_rac_get_uint(c, 8);
442 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
443 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
447 #define VP7_MVC_SIZE 17
448 #define VP8_MVC_SIZE 19
450 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
453 VP56RangeCoder *c = &s->c;
457 for (i = 0; i < 4; i++)
458 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
460 for (i = 0; i < 3; i++)
461 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
463 // 17.2 MV probability update
464 for (i = 0; i < 2; i++)
465 for (j = 0; j < mvc_size; j++)
466 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
467 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
470 static void update_refs(VP8Context *s)
472 VP56RangeCoder *c = &s->c;
474 int update_golden = vp8_rac_get(c);
475 int update_altref = vp8_rac_get(c);
477 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
478 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
481 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
485 for (j = 1; j < 3; j++) {
486 for (i = 0; i < height / 2; i++)
487 memcpy(dst->data[j] + i * dst->linesize[j],
488 src->data[j] + i * src->linesize[j], width / 2);
492 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
493 const uint8_t *src, ptrdiff_t src_linesize,
494 int width, int height,
498 for (j = 0; j < height; j++) {
499 const uint8_t *src2 = src + j * src_linesize;
500 uint8_t *dst2 = dst + j * dst_linesize;
501 for (i = 0; i < width; i++) {
503 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
508 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
512 if (!s->keyframe && (alpha || beta)) {
513 int width = s->mb_width * 16;
514 int height = s->mb_height * 16;
517 if (!s->framep[VP56_FRAME_PREVIOUS] ||
518 !s->framep[VP56_FRAME_GOLDEN]) {
519 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
520 return AVERROR_INVALIDDATA;
524 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
526 /* preserve the golden frame, write a new previous frame */
527 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
528 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
529 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
532 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
534 copy_chroma(dst, src, width, height);
537 fade(dst->data[0], dst->linesize[0],
538 src->data[0], src->linesize[0],
539 width, height, alpha, beta);
545 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
547 VP56RangeCoder *c = &s->c;
548 int part1_size, hscale, vscale, i, j, ret;
549 int width = s->avctx->width;
550 int height = s->avctx->height;
555 return AVERROR_INVALIDDATA;
558 s->profile = (buf[0] >> 1) & 7;
559 if (s->profile > 1) {
560 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
561 return AVERROR_INVALIDDATA;
564 s->keyframe = !(buf[0] & 1);
566 part1_size = AV_RL24(buf) >> 4;
568 if (buf_size < 4 - s->profile + part1_size) {
569 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
570 return AVERROR_INVALIDDATA;
573 buf += 4 - s->profile;
574 buf_size -= 4 - s->profile;
576 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
578 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
582 buf_size -= part1_size;
584 /* A. Dimension information (keyframes only) */
586 width = vp8_rac_get_uint(c, 12);
587 height = vp8_rac_get_uint(c, 12);
588 hscale = vp8_rac_get_uint(c, 2);
589 vscale = vp8_rac_get_uint(c, 2);
590 if (hscale || vscale)
591 avpriv_request_sample(s->avctx, "Upscaling");
593 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
594 vp78_reset_probability_tables(s);
595 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
596 sizeof(s->prob->pred16x16));
597 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
598 sizeof(s->prob->pred8x8c));
599 for (i = 0; i < 2; i++)
600 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
601 sizeof(vp7_mv_default_prob[i]));
602 memset(&s->segmentation, 0, sizeof(s->segmentation));
603 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
604 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
607 if (s->keyframe || s->profile > 0)
608 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
610 /* B. Decoding information for all four macroblock-level features */
611 for (i = 0; i < 4; i++) {
612 s->feature_enabled[i] = vp8_rac_get(c);
613 if (s->feature_enabled[i]) {
614 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
616 for (j = 0; j < 3; j++)
617 s->feature_index_prob[i][j] =
618 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
620 if (vp7_feature_value_size[s->profile][i])
621 for (j = 0; j < 4; j++)
622 s->feature_value[i][j] =
623 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
627 s->segmentation.enabled = 0;
628 s->segmentation.update_map = 0;
629 s->lf_delta.enabled = 0;
631 s->num_coeff_partitions = 1;
632 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
636 if (!s->macroblocks_base || /* first frame */
637 width != s->avctx->width || height != s->avctx->height ||
638 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
639 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
643 /* C. Dequantization indices */
646 /* D. Golden frame update flag (a Flag) for interframes only */
648 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
649 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
653 s->update_probabilities = 1;
656 if (s->profile > 0) {
657 s->update_probabilities = vp8_rac_get(c);
658 if (!s->update_probabilities)
659 s->prob[1] = s->prob[0];
662 s->fade_present = vp8_rac_get(c);
665 if (vpX_rac_is_end(c))
666 return AVERROR_INVALIDDATA;
667 /* E. Fading information for previous frame */
668 if (s->fade_present && vp8_rac_get(c)) {
669 alpha = (int8_t) vp8_rac_get_uint(c, 8);
670 beta = (int8_t) vp8_rac_get_uint(c, 8);
673 /* F. Loop filter type */
675 s->filter.simple = vp8_rac_get(c);
677 /* G. DCT coefficient ordering specification */
679 for (i = 1; i < 16; i++)
680 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
682 /* H. Loop filter levels */
684 s->filter.simple = vp8_rac_get(c);
685 s->filter.level = vp8_rac_get_uint(c, 6);
686 s->filter.sharpness = vp8_rac_get_uint(c, 3);
688 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
689 vp78_update_probability_tables(s);
691 s->mbskip_enabled = 0;
693 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
695 s->prob->intra = vp8_rac_get_uint(c, 8);
696 s->prob->last = vp8_rac_get_uint(c, 8);
697 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
700 if (vpX_rac_is_end(c))
701 return AVERROR_INVALIDDATA;
703 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
709 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
711 VP56RangeCoder *c = &s->c;
712 int header_size, hscale, vscale, ret;
713 int width = s->avctx->width;
714 int height = s->avctx->height;
717 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
718 return AVERROR_INVALIDDATA;
721 s->keyframe = !(buf[0] & 1);
722 s->profile = (buf[0]>>1) & 7;
723 s->invisible = !(buf[0] & 0x10);
724 header_size = AV_RL24(buf) >> 5;
728 s->header_partition_size = header_size;
731 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
734 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
735 sizeof(s->put_pixels_tab));
736 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
737 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
738 sizeof(s->put_pixels_tab));
740 if (header_size > buf_size - 7 * s->keyframe) {
741 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
742 return AVERROR_INVALIDDATA;
746 if (AV_RL24(buf) != 0x2a019d) {
747 av_log(s->avctx, AV_LOG_ERROR,
748 "Invalid start code 0x%x\n", AV_RL24(buf));
749 return AVERROR_INVALIDDATA;
751 width = AV_RL16(buf + 3) & 0x3fff;
752 height = AV_RL16(buf + 5) & 0x3fff;
753 hscale = buf[4] >> 6;
754 vscale = buf[6] >> 6;
758 if (hscale || vscale)
759 avpriv_request_sample(s->avctx, "Upscaling");
761 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
762 vp78_reset_probability_tables(s);
763 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
764 sizeof(s->prob->pred16x16));
765 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
766 sizeof(s->prob->pred8x8c));
767 memcpy(s->prob->mvc, vp8_mv_default_prob,
768 sizeof(s->prob->mvc));
769 memset(&s->segmentation, 0, sizeof(s->segmentation));
770 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
773 ret = ff_vp56_init_range_decoder(c, buf, header_size);
777 buf_size -= header_size;
780 s->colorspace = vp8_rac_get(c);
782 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
783 s->fullrange = vp8_rac_get(c);
786 if ((s->segmentation.enabled = vp8_rac_get(c)))
787 parse_segment_info(s);
789 s->segmentation.update_map = 0; // FIXME: move this to some init function?
791 s->filter.simple = vp8_rac_get(c);
792 s->filter.level = vp8_rac_get_uint(c, 6);
793 s->filter.sharpness = vp8_rac_get_uint(c, 3);
795 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
796 s->lf_delta.update = vp8_rac_get(c);
797 if (s->lf_delta.update)
801 if (setup_partitions(s, buf, buf_size)) {
802 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
803 return AVERROR_INVALIDDATA;
806 if (!s->macroblocks_base || /* first frame */
807 width != s->avctx->width || height != s->avctx->height ||
808 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
809 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
816 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
817 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
820 // if we aren't saving this frame's probabilities for future frames,
821 // make a copy of the current probabilities
822 if (!(s->update_probabilities = vp8_rac_get(c)))
823 s->prob[1] = s->prob[0];
825 s->update_last = s->keyframe || vp8_rac_get(c);
827 vp78_update_probability_tables(s);
829 if ((s->mbskip_enabled = vp8_rac_get(c)))
830 s->prob->mbskip = vp8_rac_get_uint(c, 8);
833 s->prob->intra = vp8_rac_get_uint(c, 8);
834 s->prob->last = vp8_rac_get_uint(c, 8);
835 s->prob->golden = vp8_rac_get_uint(c, 8);
836 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
839 // Record the entropy coder state here so that hwaccels can use it.
840 s->c.code_word = vp56_rac_renorm(&s->c);
841 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
842 s->coder_state_at_header_end.range = s->c.high;
843 s->coder_state_at_header_end.value = s->c.code_word >> 16;
844 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
849 static av_always_inline
850 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
852 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
853 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
854 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
855 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
859 * Motion vector coding, 17.1.
861 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
865 if (vp56_rac_get_prob_branchy(c, p[0])) {
868 for (i = 0; i < 3; i++)
869 x += vp56_rac_get_prob(c, p[9 + i]) << i;
870 for (i = (vp7 ? 7 : 9); i > 3; i--)
871 x += vp56_rac_get_prob(c, p[9 + i]) << i;
872 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
876 const uint8_t *ps = p + 2;
877 bit = vp56_rac_get_prob(c, *ps);
880 bit = vp56_rac_get_prob(c, *ps);
883 x += vp56_rac_get_prob(c, *ps);
886 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
889 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
891 return read_mv_component(c, p, 1);
894 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
896 return read_mv_component(c, p, 0);
899 static av_always_inline
900 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
903 return vp7_submv_prob;
906 return vp8_submv_prob[4 - !!left];
908 return vp8_submv_prob[2];
909 return vp8_submv_prob[1 - !!left];
913 * Split motion vector prediction, 16.4.
914 * @returns the number of motion vectors parsed (2, 4 or 16)
916 static av_always_inline
917 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
918 int layout, int is_vp7)
922 VP8Macroblock *top_mb;
923 VP8Macroblock *left_mb = &mb[-1];
924 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
925 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
927 VP56mv *left_mv = left_mb->bmv;
928 VP56mv *cur_mv = mb->bmv;
930 if (!layout) // layout is inlined, s->mb_layout is not
933 top_mb = &mb[-s->mb_width - 1];
934 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
935 top_mv = top_mb->bmv;
937 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
938 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
939 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
941 part_idx = VP8_SPLITMVMODE_8x8;
943 part_idx = VP8_SPLITMVMODE_4x4;
946 num = vp8_mbsplit_count[part_idx];
947 mbsplits_cur = vp8_mbsplits[part_idx],
948 firstidx = vp8_mbfirstidx[part_idx];
949 mb->partitioning = part_idx;
951 for (n = 0; n < num; n++) {
953 uint32_t left, above;
954 const uint8_t *submv_prob;
957 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
959 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
961 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
963 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
965 submv_prob = get_submv_prob(left, above, is_vp7);
967 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
968 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
969 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
970 mb->bmv[n].y = mb->mv.y +
971 read_mv_component(c, s->prob->mvc[0], is_vp7);
972 mb->bmv[n].x = mb->mv.x +
973 read_mv_component(c, s->prob->mvc[1], is_vp7);
975 AV_ZERO32(&mb->bmv[n]);
978 AV_WN32A(&mb->bmv[n], above);
981 AV_WN32A(&mb->bmv[n], left);
989 * The vp7 reference decoder uses a padding macroblock column (added to right
990 * edge of the frame) to guard against illegal macroblock offsets. The
991 * algorithm has bugs that permit offsets to straddle the padding column.
992 * This function replicates those bugs.
994 * @param[out] edge_x macroblock x address
995 * @param[out] edge_y macroblock y address
997 * @return macroblock offset legal (boolean)
999 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
1000 int xoffset, int yoffset, int boundary,
1001 int *edge_x, int *edge_y)
1003 int vwidth = mb_width + 1;
1004 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1005 if (new < boundary || new % vwidth == vwidth - 1)
1007 *edge_y = new / vwidth;
1008 *edge_x = new % vwidth;
1012 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1014 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1017 static av_always_inline
1018 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1019 int mb_x, int mb_y, int layout)
1021 VP8Macroblock *mb_edge[12];
1022 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1023 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1026 uint8_t cnt[3] = { 0 };
1027 VP56RangeCoder *c = &s->c;
1030 AV_ZERO32(&near_mv[0]);
1031 AV_ZERO32(&near_mv[1]);
1032 AV_ZERO32(&near_mv[2]);
1034 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1035 const VP7MVPred * pred = &vp7_mv_pred[i];
1038 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1039 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1040 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1041 ? s->macroblocks_base + 1 + edge_x +
1042 (s->mb_width + 1) * (edge_y + 1)
1043 : s->macroblocks + edge_x +
1044 (s->mb_height - edge_y - 1) * 2;
1045 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1047 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1048 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1050 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1051 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1055 AV_WN32A(&near_mv[CNT_NEAR], mv);
1059 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1068 cnt[idx] += vp7_mv_pred[i].score;
1071 mb->partitioning = VP8_SPLITMVMODE_NONE;
1073 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1074 mb->mode = VP8_MVMODE_MV;
1076 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1078 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1080 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1081 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1083 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1085 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1086 mb->mode = VP8_MVMODE_SPLIT;
1087 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1089 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1090 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1091 mb->bmv[0] = mb->mv;
1094 mb->mv = near_mv[CNT_NEAR];
1095 mb->bmv[0] = mb->mv;
1098 mb->mv = near_mv[CNT_NEAREST];
1099 mb->bmv[0] = mb->mv;
1102 mb->mode = VP8_MVMODE_ZERO;
1104 mb->bmv[0] = mb->mv;
1108 static av_always_inline
1109 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1110 int mb_x, int mb_y, int layout)
1112 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1115 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1116 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1118 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1119 int8_t *sign_bias = s->sign_bias;
1121 uint8_t cnt[4] = { 0 };
1122 VP56RangeCoder *c = &s->c;
1124 if (!layout) { // layout is inlined (s->mb_layout is not)
1125 mb_edge[0] = mb + 2;
1126 mb_edge[2] = mb + 1;
1128 mb_edge[0] = mb - s->mb_width - 1;
1129 mb_edge[2] = mb - s->mb_width - 2;
1132 AV_ZERO32(&near_mv[0]);
1133 AV_ZERO32(&near_mv[1]);
1134 AV_ZERO32(&near_mv[2]);
1136 /* Process MB on top, left and top-left */
1137 #define MV_EDGE_CHECK(n) \
1139 VP8Macroblock *edge = mb_edge[n]; \
1140 int edge_ref = edge->ref_frame; \
1141 if (edge_ref != VP56_FRAME_CURRENT) { \
1142 uint32_t mv = AV_RN32A(&edge->mv); \
1144 if (cur_sign_bias != sign_bias[edge_ref]) { \
1145 /* SWAR negate of the values in mv. */ \
1147 mv = ((mv & 0x7fff7fff) + \
1148 0x00010001) ^ (mv & 0x80008000); \
1150 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1151 AV_WN32A(&near_mv[++idx], mv); \
1152 cnt[idx] += 1 + (n != 2); \
1154 cnt[CNT_ZERO] += 1 + (n != 2); \
1162 mb->partitioning = VP8_SPLITMVMODE_NONE;
1163 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1164 mb->mode = VP8_MVMODE_MV;
1166 /* If we have three distinct MVs, merge first and last if they're the same */
1167 if (cnt[CNT_SPLITMV] &&
1168 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1169 cnt[CNT_NEAREST] += 1;
1171 /* Swap near and nearest if necessary */
1172 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1173 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1174 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1177 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1178 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1179 /* Choose the best mv out of 0,0 and the nearest mv */
1180 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1181 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1182 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1183 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1185 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1186 mb->mode = VP8_MVMODE_SPLIT;
1187 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1189 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1190 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1191 mb->bmv[0] = mb->mv;
1194 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1195 mb->bmv[0] = mb->mv;
1198 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1199 mb->bmv[0] = mb->mv;
1202 mb->mode = VP8_MVMODE_ZERO;
1204 mb->bmv[0] = mb->mv;
1208 static av_always_inline
1209 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1210 int mb_x, int keyframe, int layout)
1212 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1215 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1216 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1221 uint8_t *const left = s->intra4x4_pred_mode_left;
1223 top = mb->intra4x4_pred_mode_top;
1225 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1226 for (y = 0; y < 4; y++) {
1227 for (x = 0; x < 4; x++) {
1229 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1230 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1231 left[y] = top[x] = *intra4x4;
1237 for (i = 0; i < 16; i++)
1238 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1239 vp8_pred4x4_prob_inter);
1243 static av_always_inline
1244 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1245 VP8Macroblock *mb, int mb_x, int mb_y,
1246 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1248 VP56RangeCoder *c = &s->c;
1249 static const char * const vp7_feature_name[] = { "q-index",
1251 "partial-golden-update",
1256 for (i = 0; i < 4; i++) {
1257 if (s->feature_enabled[i]) {
1258 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1259 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1260 s->feature_index_prob[i]);
1261 av_log(s->avctx, AV_LOG_WARNING,
1262 "Feature %s present in macroblock (value 0x%x)\n",
1263 vp7_feature_name[i], s->feature_value[i][index]);
1267 } else if (s->segmentation.update_map) {
1268 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1269 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1270 } else if (s->segmentation.enabled)
1271 *segment = ref ? *ref : *segment;
1272 mb->segment = *segment;
1274 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1277 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1278 vp8_pred16x16_prob_intra);
1280 if (mb->mode == MODE_I4x4) {
1281 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1283 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1284 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1286 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1288 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1289 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1292 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1293 vp8_pred8x8c_prob_intra);
1294 mb->ref_frame = VP56_FRAME_CURRENT;
1295 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1297 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1299 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1300 : VP56_FRAME_GOLDEN;
1302 mb->ref_frame = VP56_FRAME_PREVIOUS;
1303 s->ref_count[mb->ref_frame - 1]++;
1305 // motion vectors, 16.3
1307 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1309 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1312 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1314 if (mb->mode == MODE_I4x4)
1315 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1317 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1319 mb->ref_frame = VP56_FRAME_CURRENT;
1320 mb->partitioning = VP8_SPLITMVMODE_NONE;
1321 AV_ZERO32(&mb->bmv[0]);
1326 * @param r arithmetic bitstream reader context
1327 * @param block destination for block coefficients
1328 * @param probs probabilities to use when reading trees from the bitstream
1329 * @param i initial coeff index, 0 unless a separate DC block is coded
1330 * @param qmul array holding the dc/ac dequant factor at position 0/1
1332 * @return 0 if no coeffs were decoded
1333 * otherwise, the index of the last coeff decoded plus one
1335 static av_always_inline
1336 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1337 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1338 int i, uint8_t *token_prob, int16_t qmul[2],
1339 const uint8_t scan[16], int vp7)
1341 VP56RangeCoder c = *r;
1346 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1350 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1352 break; // invalid input; blocks should end with EOB
1353 token_prob = probs[i][0];
1359 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1361 token_prob = probs[i + 1][1];
1363 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1364 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1366 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1370 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1371 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1372 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1373 } else { // DCT_CAT2
1375 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1376 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1378 } else { // DCT_CAT3 and up
1379 int a = vp56_rac_get_prob(&c, token_prob[8]);
1380 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1381 int cat = (a << 1) + b;
1382 coeff = 3 + (8 << cat);
1383 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1386 token_prob = probs[i + 1][2];
1388 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1395 static av_always_inline
1396 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1398 int16_t dc = block[0];
1406 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1407 block[0] = pred[0] = dc;
1412 block[0] = pred[0] = dc;
1418 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1420 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1421 int i, uint8_t *token_prob,
1423 const uint8_t scan[16])
1425 return decode_block_coeffs_internal(r, block, probs, i,
1426 token_prob, qmul, scan, IS_VP7);
1429 #ifndef vp8_decode_block_coeffs_internal
1430 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1432 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1433 int i, uint8_t *token_prob,
1436 return decode_block_coeffs_internal(r, block, probs, i,
1437 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1442 * @param c arithmetic bitstream reader context
1443 * @param block destination for block coefficients
1444 * @param probs probabilities to use when reading trees from the bitstream
1445 * @param i initial coeff index, 0 unless a separate DC block is coded
1446 * @param zero_nhood the initial prediction context for number of surrounding
1447 * all-zero blocks (only left/top, so 0-2)
1448 * @param qmul array holding the dc/ac dequant factor at position 0/1
1449 * @param scan scan pattern (VP7 only)
1451 * @return 0 if no coeffs were decoded
1452 * otherwise, the index of the last coeff decoded plus one
1454 static av_always_inline
1455 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1456 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1457 int i, int zero_nhood, int16_t qmul[2],
1458 const uint8_t scan[16], int vp7)
1460 uint8_t *token_prob = probs[i][zero_nhood];
1461 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1463 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1464 token_prob, qmul, scan)
1465 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1469 static av_always_inline
1470 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1471 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1474 int i, x, y, luma_start = 0, luma_ctx = 3;
1475 int nnz_pred, nnz, nnz_total = 0;
1476 int segment = mb->segment;
1479 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1480 nnz_pred = t_nnz[8] + l_nnz[8];
1482 // decode DC values and do hadamard
1483 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1484 nnz_pred, s->qmat[segment].luma_dc_qmul,
1485 ff_zigzag_scan, is_vp7);
1486 l_nnz[8] = t_nnz[8] = !!nnz;
1488 if (is_vp7 && mb->mode > MODE_I4x4) {
1489 nnz |= inter_predict_dc(td->block_dc,
1490 s->inter_dc_pred[mb->ref_frame - 1]);
1497 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1499 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1506 for (y = 0; y < 4; y++)
1507 for (x = 0; x < 4; x++) {
1508 nnz_pred = l_nnz[y] + t_nnz[x];
1509 nnz = decode_block_coeffs(c, td->block[y][x],
1510 s->prob->token[luma_ctx],
1511 luma_start, nnz_pred,
1512 s->qmat[segment].luma_qmul,
1513 s->prob[0].scan, is_vp7);
1514 /* nnz+block_dc may be one more than the actual last index,
1515 * but we don't care */
1516 td->non_zero_count_cache[y][x] = nnz + block_dc;
1517 t_nnz[x] = l_nnz[y] = !!nnz;
1522 // TODO: what to do about dimensions? 2nd dim for luma is x,
1523 // but for chroma it's (y<<1)|x
1524 for (i = 4; i < 6; i++)
1525 for (y = 0; y < 2; y++)
1526 for (x = 0; x < 2; x++) {
1527 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1528 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1529 s->prob->token[2], 0, nnz_pred,
1530 s->qmat[segment].chroma_qmul,
1531 s->prob[0].scan, is_vp7);
1532 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1533 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1537 // if there were no coded coeffs despite the macroblock not being marked skip,
1538 // we MUST not do the inner loop filter and should not do IDCT
1539 // Since skip isn't used for bitstream prediction, just manually set it.
1544 static av_always_inline
1545 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1546 uint8_t *src_cb, uint8_t *src_cr,
1547 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1549 AV_COPY128(top_border, src_y + 15 * linesize);
1551 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1552 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1556 static av_always_inline
1557 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1558 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1559 int mb_y, int mb_width, int simple, int xchg)
1561 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1563 src_cb -= uvlinesize;
1564 src_cr -= uvlinesize;
1566 #define XCHG(a, b, xchg) \
1574 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1575 XCHG(top_border, src_y, xchg);
1576 XCHG(top_border + 8, src_y + 8, 1);
1577 if (mb_x < mb_width - 1)
1578 XCHG(top_border + 32, src_y + 16, 1);
1580 // only copy chroma for normal loop filter
1581 // or to initialize the top row to 127
1582 if (!simple || !mb_y) {
1583 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1584 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1585 XCHG(top_border + 16, src_cb, 1);
1586 XCHG(top_border + 24, src_cr, 1);
1590 static av_always_inline
1591 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1594 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1596 return mb_y ? mode : LEFT_DC_PRED8x8;
1599 static av_always_inline
1600 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1603 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1605 return mb_y ? mode : HOR_PRED8x8;
1608 static av_always_inline
1609 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1613 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1615 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1617 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1618 case PLANE_PRED8x8: /* TM */
1619 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1624 static av_always_inline
1625 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1628 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1630 return mb_y ? mode : HOR_VP8_PRED;
1634 static av_always_inline
1635 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1636 int *copy_buf, int vp7)
1640 if (!mb_x && mb_y) {
1645 case DIAG_DOWN_LEFT_PRED:
1646 case VERT_LEFT_PRED:
1647 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1655 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1657 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1658 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1659 * as 16x16/8x8 DC */
1660 case DIAG_DOWN_RIGHT_PRED:
1661 case VERT_RIGHT_PRED:
1670 static av_always_inline
1671 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1672 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1674 int x, y, mode, nnz;
1677 /* for the first row, we need to run xchg_mb_border to init the top edge
1678 * to 127 otherwise, skip it if we aren't going to deblock */
1679 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1680 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1681 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1682 s->filter.simple, 1);
1684 if (mb->mode < MODE_I4x4) {
1685 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1686 s->hpc.pred16x16[mode](dst[0], s->linesize);
1688 uint8_t *ptr = dst[0];
1689 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1690 const uint8_t lo = is_vp7 ? 128 : 127;
1691 const uint8_t hi = is_vp7 ? 128 : 129;
1692 uint8_t tr_top[4] = { lo, lo, lo, lo };
1694 // all blocks on the right edge of the macroblock use bottom edge
1695 // the top macroblock for their topright edge
1696 uint8_t *tr_right = ptr - s->linesize + 16;
1698 // if we're on the right edge of the frame, said edge is extended
1699 // from the top macroblock
1700 if (mb_y && mb_x == s->mb_width - 1) {
1701 tr = tr_right[-1] * 0x01010101u;
1702 tr_right = (uint8_t *) &tr;
1706 AV_ZERO128(td->non_zero_count_cache);
1708 for (y = 0; y < 4; y++) {
1709 uint8_t *topright = ptr + 4 - s->linesize;
1710 for (x = 0; x < 4; x++) {
1712 ptrdiff_t linesize = s->linesize;
1713 uint8_t *dst = ptr + 4 * x;
1714 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1716 if ((y == 0 || x == 3) && mb_y == 0) {
1719 topright = tr_right;
1721 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1722 mb_y + y, ©, is_vp7);
1724 dst = copy_dst + 12;
1728 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1730 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1734 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1743 copy_dst[11] = ptr[4 * x - 1];
1744 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1745 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1746 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1749 s->hpc.pred4x4[mode](dst, topright, linesize);
1751 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1752 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1753 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1754 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1757 nnz = td->non_zero_count_cache[y][x];
1760 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1761 td->block[y][x], s->linesize);
1763 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1764 td->block[y][x], s->linesize);
1769 ptr += 4 * s->linesize;
1774 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1775 mb_x, mb_y, is_vp7);
1776 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1777 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1779 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1780 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1781 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1782 s->filter.simple, 0);
1785 static const uint8_t subpel_idx[3][8] = {
1786 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1787 // also function pointer index
1788 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1789 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1795 * @param s VP8 decoding context
1796 * @param dst target buffer for block data at block position
1797 * @param ref reference picture buffer at origin (0, 0)
1798 * @param mv motion vector (relative to block position) to get pixel data from
1799 * @param x_off horizontal position of block from origin (0, 0)
1800 * @param y_off vertical position of block from origin (0, 0)
1801 * @param block_w width of block (16, 8 or 4)
1802 * @param block_h height of block (always same as block_w)
1803 * @param width width of src/dst plane data
1804 * @param height height of src/dst plane data
1805 * @param linesize size of a single line of plane data, including padding
1806 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1808 static av_always_inline
1809 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1810 ThreadFrame *ref, const VP56mv *mv,
1811 int x_off, int y_off, int block_w, int block_h,
1812 int width, int height, ptrdiff_t linesize,
1813 vp8_mc_func mc_func[3][3])
1815 uint8_t *src = ref->f->data[0];
1818 ptrdiff_t src_linesize = linesize;
1820 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1821 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1823 x_off += mv->x >> 2;
1824 y_off += mv->y >> 2;
1827 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1828 src += y_off * linesize + x_off;
1829 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1830 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1831 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1832 src - my_idx * linesize - mx_idx,
1833 EDGE_EMU_LINESIZE, linesize,
1834 block_w + subpel_idx[1][mx],
1835 block_h + subpel_idx[1][my],
1836 x_off - mx_idx, y_off - my_idx,
1838 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1839 src_linesize = EDGE_EMU_LINESIZE;
1841 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1843 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1844 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1845 linesize, block_h, 0, 0);
1850 * chroma MC function
1852 * @param s VP8 decoding context
1853 * @param dst1 target buffer for block data at block position (U plane)
1854 * @param dst2 target buffer for block data at block position (V plane)
1855 * @param ref reference picture buffer at origin (0, 0)
1856 * @param mv motion vector (relative to block position) to get pixel data from
1857 * @param x_off horizontal position of block from origin (0, 0)
1858 * @param y_off vertical position of block from origin (0, 0)
1859 * @param block_w width of block (16, 8 or 4)
1860 * @param block_h height of block (always same as block_w)
1861 * @param width width of src/dst plane data
1862 * @param height height of src/dst plane data
1863 * @param linesize size of a single line of plane data, including padding
1864 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1866 static av_always_inline
1867 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1868 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1869 int x_off, int y_off, int block_w, int block_h,
1870 int width, int height, ptrdiff_t linesize,
1871 vp8_mc_func mc_func[3][3])
1873 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1876 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1877 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1879 x_off += mv->x >> 3;
1880 y_off += mv->y >> 3;
1883 src1 += y_off * linesize + x_off;
1884 src2 += y_off * linesize + x_off;
1885 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1886 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1887 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1888 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1889 src1 - my_idx * linesize - mx_idx,
1890 EDGE_EMU_LINESIZE, linesize,
1891 block_w + subpel_idx[1][mx],
1892 block_h + subpel_idx[1][my],
1893 x_off - mx_idx, y_off - my_idx, width, height);
1894 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1895 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1897 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1898 src2 - my_idx * linesize - mx_idx,
1899 EDGE_EMU_LINESIZE, linesize,
1900 block_w + subpel_idx[1][mx],
1901 block_h + subpel_idx[1][my],
1902 x_off - mx_idx, y_off - my_idx, width, height);
1903 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1904 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1906 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1907 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1910 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1911 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1912 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1916 static av_always_inline
1917 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1918 ThreadFrame *ref_frame, int x_off, int y_off,
1919 int bx_off, int by_off, int block_w, int block_h,
1920 int width, int height, VP56mv *mv)
1925 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1926 ref_frame, mv, x_off + bx_off, y_off + by_off,
1927 block_w, block_h, width, height, s->linesize,
1928 s->put_pixels_tab[block_w == 8]);
1931 if (s->profile == 3) {
1932 /* this block only applies VP8; it is safe to check
1933 * only the profile, as VP7 profile <= 1 */
1945 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1946 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1947 &uvmv, x_off + bx_off, y_off + by_off,
1948 block_w, block_h, width, height, s->uvlinesize,
1949 s->put_pixels_tab[1 + (block_w == 4)]);
1952 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1953 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1954 static av_always_inline
1955 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1958 /* Don't prefetch refs that haven't been used very often this frame. */
1959 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1960 int x_off = mb_x << 4, y_off = mb_y << 4;
1961 int mx = (mb->mv.x >> 2) + x_off + 8;
1962 int my = (mb->mv.y >> 2) + y_off;
1963 uint8_t **src = s->framep[ref]->tf.f->data;
1964 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1965 /* For threading, a ff_thread_await_progress here might be useful, but
1966 * it actually slows down the decoder. Since a bad prefetch doesn't
1967 * generate bad decoder output, we don't run it here. */
1968 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1969 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1970 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1975 * Apply motion vectors to prediction buffer, chapter 18.
1977 static av_always_inline
1978 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1979 VP8Macroblock *mb, int mb_x, int mb_y)
1981 int x_off = mb_x << 4, y_off = mb_y << 4;
1982 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1983 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1984 VP56mv *bmv = mb->bmv;
1986 switch (mb->partitioning) {
1987 case VP8_SPLITMVMODE_NONE:
1988 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1989 0, 0, 16, 16, width, height, &mb->mv);
1991 case VP8_SPLITMVMODE_4x4: {
1996 for (y = 0; y < 4; y++) {
1997 for (x = 0; x < 4; x++) {
1998 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1999 ref, &bmv[4 * y + x],
2000 4 * x + x_off, 4 * y + y_off, 4, 4,
2001 width, height, s->linesize,
2002 s->put_pixels_tab[2]);
2011 for (y = 0; y < 2; y++) {
2012 for (x = 0; x < 2; x++) {
2013 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2014 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2015 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2016 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2017 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2018 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2019 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2020 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2021 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2022 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2023 if (s->profile == 3) {
2027 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2028 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2029 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2030 width, height, s->uvlinesize,
2031 s->put_pixels_tab[2]);
2036 case VP8_SPLITMVMODE_16x8:
2037 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2038 0, 0, 16, 8, width, height, &bmv[0]);
2039 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040 0, 8, 16, 8, width, height, &bmv[1]);
2042 case VP8_SPLITMVMODE_8x16:
2043 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2044 0, 0, 8, 16, width, height, &bmv[0]);
2045 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2046 8, 0, 8, 16, width, height, &bmv[1]);
2048 case VP8_SPLITMVMODE_8x8:
2049 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050 0, 0, 8, 8, width, height, &bmv[0]);
2051 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2052 8, 0, 8, 8, width, height, &bmv[1]);
2053 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2054 0, 8, 8, 8, width, height, &bmv[2]);
2055 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2056 8, 8, 8, 8, width, height, &bmv[3]);
2061 static av_always_inline
2062 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2066 if (mb->mode != MODE_I4x4) {
2067 uint8_t *y_dst = dst[0];
2068 for (y = 0; y < 4; y++) {
2069 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2071 if (nnz4 & ~0x01010101) {
2072 for (x = 0; x < 4; x++) {
2073 if ((uint8_t) nnz4 == 1)
2074 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2077 else if ((uint8_t) nnz4 > 1)
2078 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2086 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2089 y_dst += 4 * s->linesize;
2093 for (ch = 0; ch < 2; ch++) {
2094 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2096 uint8_t *ch_dst = dst[1 + ch];
2097 if (nnz4 & ~0x01010101) {
2098 for (y = 0; y < 2; y++) {
2099 for (x = 0; x < 2; x++) {
2100 if ((uint8_t) nnz4 == 1)
2101 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2102 td->block[4 + ch][(y << 1) + x],
2104 else if ((uint8_t) nnz4 > 1)
2105 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2106 td->block[4 + ch][(y << 1) + x],
2110 goto chroma_idct_end;
2112 ch_dst += 4 * s->uvlinesize;
2115 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2123 static av_always_inline
2124 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2125 VP8FilterStrength *f, int is_vp7)
2127 int interior_limit, filter_level;
2129 if (s->segmentation.enabled) {
2130 filter_level = s->segmentation.filter_level[mb->segment];
2131 if (!s->segmentation.absolute_vals)
2132 filter_level += s->filter.level;
2134 filter_level = s->filter.level;
2136 if (s->lf_delta.enabled) {
2137 filter_level += s->lf_delta.ref[mb->ref_frame];
2138 filter_level += s->lf_delta.mode[mb->mode];
2141 filter_level = av_clip_uintp2(filter_level, 6);
2143 interior_limit = filter_level;
2144 if (s->filter.sharpness) {
2145 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2146 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2148 interior_limit = FFMAX(interior_limit, 1);
2150 f->filter_level = filter_level;
2151 f->inner_limit = interior_limit;
2152 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2153 mb->mode == VP8_MVMODE_SPLIT;
2156 static av_always_inline
2157 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2158 int mb_x, int mb_y, int is_vp7)
2160 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2161 int filter_level = f->filter_level;
2162 int inner_limit = f->inner_limit;
2163 int inner_filter = f->inner_filter;
2164 ptrdiff_t linesize = s->linesize;
2165 ptrdiff_t uvlinesize = s->uvlinesize;
2166 static const uint8_t hev_thresh_lut[2][64] = {
2167 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2168 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2169 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2171 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2172 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2173 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2181 bedge_lim_y = filter_level;
2182 bedge_lim_uv = filter_level * 2;
2183 mbedge_lim = filter_level + 2;
2186 bedge_lim_uv = filter_level * 2 + inner_limit;
2187 mbedge_lim = bedge_lim_y + 4;
2190 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2193 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2194 mbedge_lim, inner_limit, hev_thresh);
2195 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2196 mbedge_lim, inner_limit, hev_thresh);
2199 #define H_LOOP_FILTER_16Y_INNER(cond) \
2200 if (cond && inner_filter) { \
2201 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2202 bedge_lim_y, inner_limit, \
2204 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2205 bedge_lim_y, inner_limit, \
2207 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2208 bedge_lim_y, inner_limit, \
2210 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2211 uvlinesize, bedge_lim_uv, \
2212 inner_limit, hev_thresh); \
2215 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2218 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2219 mbedge_lim, inner_limit, hev_thresh);
2220 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2221 mbedge_lim, inner_limit, hev_thresh);
2225 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2226 linesize, bedge_lim_y,
2227 inner_limit, hev_thresh);
2228 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2229 linesize, bedge_lim_y,
2230 inner_limit, hev_thresh);
2231 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2232 linesize, bedge_lim_y,
2233 inner_limit, hev_thresh);
2234 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2235 dst[2] + 4 * uvlinesize,
2236 uvlinesize, bedge_lim_uv,
2237 inner_limit, hev_thresh);
2240 H_LOOP_FILTER_16Y_INNER(is_vp7)
2243 static av_always_inline
2244 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2247 int mbedge_lim, bedge_lim;
2248 int filter_level = f->filter_level;
2249 int inner_limit = f->inner_limit;
2250 int inner_filter = f->inner_filter;
2251 ptrdiff_t linesize = s->linesize;
2256 bedge_lim = 2 * filter_level + inner_limit;
2257 mbedge_lim = bedge_lim + 4;
2260 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2262 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2263 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2264 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2268 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2270 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2271 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2272 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2276 #define MARGIN (16 << 2)
2277 static av_always_inline
2278 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2279 VP8Frame *prev_frame, int is_vp7)
2281 VP8Context *s = avctx->priv_data;
2284 s->mv_bounds.mv_min.y = -MARGIN;
2285 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2286 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2287 VP8Macroblock *mb = s->macroblocks_base +
2288 ((s->mb_width + 1) * (mb_y + 1) + 1);
2289 int mb_xy = mb_y * s->mb_width;
2291 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2293 s->mv_bounds.mv_min.x = -MARGIN;
2294 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2296 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2297 if (vpX_rac_is_end(&s->c)) {
2298 return AVERROR_INVALIDDATA;
2301 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2302 DC_PRED * 0x01010101);
2303 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2304 prev_frame && prev_frame->seg_map ?
2305 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2306 s->mv_bounds.mv_min.x -= 64;
2307 s->mv_bounds.mv_max.x -= 64;
2309 s->mv_bounds.mv_min.y -= 64;
2310 s->mv_bounds.mv_max.y -= 64;
2315 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2316 VP8Frame *prev_frame)
2318 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2321 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2322 VP8Frame *prev_frame)
2324 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2328 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2330 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2331 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2332 pthread_mutex_lock(&otd->lock); \
2333 atomic_store(&td->wait_mb_pos, tmp); \
2335 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2337 pthread_cond_wait(&otd->cond, &otd->lock); \
2339 atomic_store(&td->wait_mb_pos, INT_MAX); \
2340 pthread_mutex_unlock(&otd->lock); \
2344 #define update_pos(td, mb_y, mb_x) \
2346 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2347 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2349 int is_null = !next_td || !prev_td; \
2350 int pos_check = (is_null) ? 1 : \
2351 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2352 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2353 atomic_store(&td->thread_mb_pos, pos); \
2354 if (sliced_threading && pos_check) { \
2355 pthread_mutex_lock(&td->lock); \
2356 pthread_cond_broadcast(&td->cond); \
2357 pthread_mutex_unlock(&td->lock); \
2361 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2362 #define update_pos(td, mb_y, mb_x) while(0)
2365 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2366 int jobnr, int threadnr, int is_vp7)
2368 VP8Context *s = avctx->priv_data;
2369 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2370 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2371 int mb_x, mb_xy = mb_y * s->mb_width;
2372 int num_jobs = s->num_jobs;
2373 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2374 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2377 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2378 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2379 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2382 if (vpX_rac_is_end(c))
2383 return AVERROR_INVALIDDATA;
2388 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2389 if (mb_y == s->mb_height - 1)
2392 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2393 if (s->mb_layout == 1)
2394 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2396 // Make sure the previous frame has read its segmentation map,
2397 // if we re-use the same map.
2398 if (prev_frame && s->segmentation.enabled &&
2399 !s->segmentation.update_map)
2400 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2401 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2402 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2403 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2406 if (!is_vp7 || mb_y == 0)
2407 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2409 td->mv_bounds.mv_min.x = -MARGIN;
2410 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2412 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2413 if (vpX_rac_is_end(c))
2414 return AVERROR_INVALIDDATA;
2415 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2416 if (prev_td != td) {
2417 if (threadnr != 0) {
2418 check_thread_pos(td, prev_td,
2419 mb_x + (is_vp7 ? 2 : 1),
2420 mb_y - (is_vp7 ? 2 : 1));
2422 check_thread_pos(td, prev_td,
2423 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2424 mb_y - (is_vp7 ? 2 : 1));
2428 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2430 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2431 dst[2] - dst[1], 2);
2434 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2435 prev_frame && prev_frame->seg_map ?
2436 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2438 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2441 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2443 if (mb->mode <= MODE_I4x4)
2444 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2446 inter_predict(s, td, dst, mb, mb_x, mb_y);
2448 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2451 idct_mb(s, td, dst, mb);
2453 AV_ZERO64(td->left_nnz);
2454 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2456 /* Reset DC block predictors if they would exist
2457 * if the mb had coefficients */
2458 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2459 td->left_nnz[8] = 0;
2460 s->top_nnz[mb_x][8] = 0;
2464 if (s->deblock_filter)
2465 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2467 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2468 if (s->filter.simple)
2469 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2470 NULL, NULL, s->linesize, 0, 1);
2472 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2473 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2476 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2481 td->mv_bounds.mv_min.x -= 64;
2482 td->mv_bounds.mv_max.x -= 64;
2484 if (mb_x == s->mb_width + 1) {
2485 update_pos(td, mb_y, s->mb_width + 3);
2487 update_pos(td, mb_y, mb_x);
2493 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2494 int jobnr, int threadnr)
2496 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2499 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2500 int jobnr, int threadnr)
2502 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2505 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2506 int jobnr, int threadnr, int is_vp7)
2508 VP8Context *s = avctx->priv_data;
2509 VP8ThreadData *td = &s->thread_data[threadnr];
2510 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2511 AVFrame *curframe = s->curframe->tf.f;
2513 VP8ThreadData *prev_td, *next_td;
2515 curframe->data[0] + 16 * mb_y * s->linesize,
2516 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2517 curframe->data[2] + 8 * mb_y * s->uvlinesize
2520 if (s->mb_layout == 1)
2521 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2523 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2528 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2529 if (mb_y == s->mb_height - 1)
2532 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2534 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2535 VP8FilterStrength *f = &td->filter_strength[mb_x];
2537 check_thread_pos(td, prev_td,
2538 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2540 if (next_td != &s->thread_data[0])
2541 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2543 if (num_jobs == 1) {
2544 if (s->filter.simple)
2545 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2546 NULL, NULL, s->linesize, 0, 1);
2548 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2549 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2552 if (s->filter.simple)
2553 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2555 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2560 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2564 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2565 int jobnr, int threadnr)
2567 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2570 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2571 int jobnr, int threadnr)
2573 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2576 static av_always_inline
2577 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2578 int threadnr, int is_vp7)
2580 VP8Context *s = avctx->priv_data;
2581 VP8ThreadData *td = &s->thread_data[jobnr];
2582 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2583 VP8Frame *curframe = s->curframe;
2584 int mb_y, num_jobs = s->num_jobs;
2587 td->thread_nr = threadnr;
2588 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2589 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2590 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2591 atomic_store(&td->thread_mb_pos, mb_y << 16);
2592 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2594 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2597 if (s->deblock_filter)
2598 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2599 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2601 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2602 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2604 if (avctx->active_thread_type == FF_THREAD_FRAME)
2605 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2611 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2612 int jobnr, int threadnr)
2614 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2617 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2618 int jobnr, int threadnr)
2620 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2623 static av_always_inline
2624 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2625 const AVPacket *avpkt, int is_vp7)
2627 VP8Context *s = avctx->priv_data;
2628 int ret, i, referenced, num_jobs;
2629 enum AVDiscard skip_thresh;
2630 VP8Frame *av_uninit(curframe), *prev_frame;
2633 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2635 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2640 if (s->actually_webp) {
2641 // avctx->pix_fmt already set in caller.
2642 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2643 s->pix_fmt = get_pixel_format(s);
2644 if (s->pix_fmt < 0) {
2645 ret = AVERROR(EINVAL);
2648 avctx->pix_fmt = s->pix_fmt;
2651 prev_frame = s->framep[VP56_FRAME_CURRENT];
2653 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2654 s->update_altref == VP56_FRAME_CURRENT;
2656 skip_thresh = !referenced ? AVDISCARD_NONREF
2657 : !s->keyframe ? AVDISCARD_NONKEY
2660 if (avctx->skip_frame >= skip_thresh) {
2662 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2665 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2667 // release no longer referenced frames
2668 for (i = 0; i < 5; i++)
2669 if (s->frames[i].tf.f->buf[0] &&
2670 &s->frames[i] != prev_frame &&
2671 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2672 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2673 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2674 vp8_release_frame(s, &s->frames[i]);
2676 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2679 avctx->colorspace = AVCOL_SPC_BT470BG;
2681 avctx->color_range = AVCOL_RANGE_JPEG;
2683 avctx->color_range = AVCOL_RANGE_MPEG;
2685 /* Given that arithmetic probabilities are updated every frame, it's quite
2686 * likely that the values we have on a random interframe are complete
2687 * junk if we didn't start decode on a keyframe. So just don't display
2688 * anything rather than junk. */
2689 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2690 !s->framep[VP56_FRAME_GOLDEN] ||
2691 !s->framep[VP56_FRAME_GOLDEN2])) {
2692 av_log(avctx, AV_LOG_WARNING,
2693 "Discarding interframe without a prior keyframe!\n");
2694 ret = AVERROR_INVALIDDATA;
2698 curframe->tf.f->key_frame = s->keyframe;
2699 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2700 : AV_PICTURE_TYPE_P;
2701 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2704 // check if golden and altref are swapped
2705 if (s->update_altref != VP56_FRAME_NONE)
2706 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2708 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2710 if (s->update_golden != VP56_FRAME_NONE)
2711 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2713 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2716 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2718 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2720 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2722 if (avctx->codec->update_thread_context)
2723 ff_thread_finish_setup(avctx);
2725 if (avctx->hwaccel) {
2726 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2730 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2734 ret = avctx->hwaccel->end_frame(avctx);
2739 s->linesize = curframe->tf.f->linesize[0];
2740 s->uvlinesize = curframe->tf.f->linesize[1];
2742 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2743 /* Zero macroblock structures for top/top-left prediction
2744 * from outside the frame. */
2746 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2747 (s->mb_width + 1) * sizeof(*s->macroblocks));
2748 if (!s->mb_layout && s->keyframe)
2749 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2751 memset(s->ref_count, 0, sizeof(s->ref_count));
2753 if (s->mb_layout == 1) {
2754 // Make sure the previous frame has read its segmentation map,
2755 // if we re-use the same map.
2756 if (prev_frame && s->segmentation.enabled &&
2757 !s->segmentation.update_map)
2758 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2760 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2762 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2767 if (avctx->active_thread_type == FF_THREAD_FRAME)
2770 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2771 s->num_jobs = num_jobs;
2772 s->curframe = curframe;
2773 s->prev_frame = prev_frame;
2774 s->mv_bounds.mv_min.y = -MARGIN;
2775 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2776 for (i = 0; i < MAX_THREADS; i++) {
2777 VP8ThreadData *td = &s->thread_data[i];
2778 atomic_init(&td->thread_mb_pos, 0);
2779 atomic_init(&td->wait_mb_pos, INT_MAX);
2782 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2785 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2789 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2790 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2793 // if future frames don't use the updated probabilities,
2794 // reset them to the values we saved
2795 if (!s->update_probabilities)
2796 s->prob[0] = s->prob[1];
2798 if (!s->invisible) {
2799 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2806 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2810 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2813 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2816 #if CONFIG_VP7_DECODER
2817 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2820 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2822 #endif /* CONFIG_VP7_DECODER */
2824 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2826 VP8Context *s = avctx->priv_data;
2832 vp8_decode_flush_impl(avctx, 1);
2833 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2834 av_frame_free(&s->frames[i].tf.f);
2839 static av_cold int vp8_init_frames(VP8Context *s)
2842 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2843 s->frames[i].tf.f = av_frame_alloc();
2844 if (!s->frames[i].tf.f)
2845 return AVERROR(ENOMEM);
2850 static av_always_inline
2851 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2853 VP8Context *s = avctx->priv_data;
2857 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2858 s->pix_fmt = AV_PIX_FMT_NONE;
2859 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2861 ff_videodsp_init(&s->vdsp, 8);
2863 ff_vp78dsp_init(&s->vp8dsp);
2864 if (CONFIG_VP7_DECODER && is_vp7) {
2865 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2866 ff_vp7dsp_init(&s->vp8dsp);
2867 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2868 s->filter_mb_row = vp7_filter_mb_row;
2869 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2870 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2871 ff_vp8dsp_init(&s->vp8dsp);
2872 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2873 s->filter_mb_row = vp8_filter_mb_row;
2876 /* does not change for VP8 */
2877 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2879 if ((ret = vp8_init_frames(s)) < 0) {
2880 ff_vp8_decode_free(avctx);
2887 #if CONFIG_VP7_DECODER
2888 static int vp7_decode_init(AVCodecContext *avctx)
2890 return vp78_decode_init(avctx, IS_VP7);
2892 #endif /* CONFIG_VP7_DECODER */
2894 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2896 return vp78_decode_init(avctx, IS_VP8);
2899 #if CONFIG_VP8_DECODER
2901 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2903 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2904 const AVCodecContext *src)
2906 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2909 if (s->macroblocks_base &&
2910 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2912 s->mb_width = s_src->mb_width;
2913 s->mb_height = s_src->mb_height;
2916 s->pix_fmt = s_src->pix_fmt;
2917 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2918 s->segmentation = s_src->segmentation;
2919 s->lf_delta = s_src->lf_delta;
2920 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2922 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2923 if (s_src->frames[i].tf.f->buf[0]) {
2924 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2930 s->framep[0] = REBASE(s_src->next_framep[0]);
2931 s->framep[1] = REBASE(s_src->next_framep[1]);
2932 s->framep[2] = REBASE(s_src->next_framep[2]);
2933 s->framep[3] = REBASE(s_src->next_framep[3]);
2937 #endif /* HAVE_THREADS */
2938 #endif /* CONFIG_VP8_DECODER */
2940 #if CONFIG_VP7_DECODER
2941 const AVCodec ff_vp7_decoder = {
2943 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2944 .type = AVMEDIA_TYPE_VIDEO,
2945 .id = AV_CODEC_ID_VP7,
2946 .priv_data_size = sizeof(VP8Context),
2947 .init = vp7_decode_init,
2948 .close = ff_vp8_decode_free,
2949 .decode = vp7_decode_frame,
2950 .capabilities = AV_CODEC_CAP_DR1,
2951 .flush = vp8_decode_flush,
2953 #endif /* CONFIG_VP7_DECODER */
2955 #if CONFIG_VP8_DECODER
2956 const AVCodec ff_vp8_decoder = {
2958 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2959 .type = AVMEDIA_TYPE_VIDEO,
2960 .id = AV_CODEC_ID_VP8,
2961 .priv_data_size = sizeof(VP8Context),
2962 .init = ff_vp8_decode_init,
2963 .close = ff_vp8_decode_free,
2964 .decode = ff_vp8_decode_frame,
2965 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2966 AV_CODEC_CAP_SLICE_THREADS,
2967 .flush = vp8_decode_flush,
2968 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2969 .hw_configs = (const AVCodecHWConfigInternal *const []) {
2970 #if CONFIG_VP8_VAAPI_HWACCEL
2973 #if CONFIG_VP8_NVDEC_HWACCEL
2978 .caps_internal = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2980 #endif /* CONFIG_VP7_DECODER */