2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
50 static void free_buffers(VP8Context *s)
54 for (i = 0; i < MAX_THREADS; i++) {
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
59 av_freep(&s->thread_data[i].filter_strength);
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
67 s->macroblocks = NULL;
70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
103 #if CONFIG_VP8_DECODER
104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
108 vp8_release_frame(s, dst);
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
126 #endif /* CONFIG_VP8_DECODER */
128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 VP8Context *s = avctx->priv_data;
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
141 static void vp8_decode_flush(AVCodecContext *avctx)
143 vp8_decode_flush_impl(avctx, 0);
146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 VP8Frame *frame = NULL;
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
176 #if CONFIG_VP8_NVDEC_HWACCEL
183 return ff_get_format(s->avctx, pix_fmts);
186 static av_always_inline
187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189 AVCodecContext *avctx = s->avctx;
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
196 ret = ff_set_dimensions(s->avctx, width, height);
201 if (!s->actually_webp && !is_vp7) {
202 s->pix_fmt = get_pixel_format(s);
204 return AVERROR(EINVAL);
205 avctx->pix_fmt = s->pix_fmt;
208 s->mb_width = (s->avctx->coded_width + 15) / 16;
209 s->mb_height = (s->avctx->coded_height + 15) / 16;
211 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
212 avctx->thread_count > 1;
213 if (!s->mb_layout) { // Frame threading and one thread
214 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
215 sizeof(*s->macroblocks));
216 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
217 } else // Sliced threading
218 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
219 sizeof(*s->macroblocks));
220 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
221 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
222 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
224 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
225 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
227 return AVERROR(ENOMEM);
230 for (i = 0; i < MAX_THREADS; i++) {
231 s->thread_data[i].filter_strength =
232 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
233 if (!s->thread_data[i].filter_strength) {
235 return AVERROR(ENOMEM);
238 pthread_mutex_init(&s->thread_data[i].lock, NULL);
239 pthread_cond_init(&s->thread_data[i].cond, NULL);
243 s->macroblocks = s->macroblocks_base + 1;
248 static int vp7_update_dimensions(VP8Context *s, int width, int height)
250 return update_dimensions(s, width, height, IS_VP7);
253 static int vp8_update_dimensions(VP8Context *s, int width, int height)
255 return update_dimensions(s, width, height, IS_VP8);
259 static void parse_segment_info(VP8Context *s)
261 VP56RangeCoder *c = &s->c;
264 s->segmentation.update_map = vp8_rac_get(c);
265 s->segmentation.update_feature_data = vp8_rac_get(c);
267 if (s->segmentation.update_feature_data) {
268 s->segmentation.absolute_vals = vp8_rac_get(c);
270 for (i = 0; i < 4; i++)
271 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
273 for (i = 0; i < 4; i++)
274 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
276 if (s->segmentation.update_map)
277 for (i = 0; i < 3; i++)
278 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
281 static void update_lf_deltas(VP8Context *s)
283 VP56RangeCoder *c = &s->c;
286 for (i = 0; i < 4; i++) {
287 if (vp8_rac_get(c)) {
288 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
291 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
296 if (vp8_rac_get(c)) {
297 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
300 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
305 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
307 const uint8_t *sizes = buf;
311 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
313 buf += 3 * (s->num_coeff_partitions - 1);
314 buf_size -= 3 * (s->num_coeff_partitions - 1);
318 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
319 int size = AV_RL24(sizes + 3 * i);
320 if (buf_size - size < 0)
322 s->coeff_partition_size[i] = size;
324 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
331 s->coeff_partition_size[i] = buf_size;
332 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
337 static void vp7_get_quants(VP8Context *s)
339 VP56RangeCoder *c = &s->c;
341 int yac_qi = vp8_rac_get_uint(c, 7);
342 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
343 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
344 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
345 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
349 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
350 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
351 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
352 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
353 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
356 static void vp8_get_quants(VP8Context *s)
358 VP56RangeCoder *c = &s->c;
361 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
362 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
363 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
364 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
365 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
368 for (i = 0; i < 4; i++) {
369 if (s->segmentation.enabled) {
370 base_qi = s->segmentation.base_quant[i];
371 if (!s->segmentation.absolute_vals)
372 base_qi += s->quant.yac_qi;
374 base_qi = s->quant.yac_qi;
376 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
377 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
378 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
379 /* 101581>>16 is equivalent to 155/100 */
380 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
381 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
382 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
384 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
385 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
390 * Determine which buffers golden and altref should be updated with after this frame.
391 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
393 * Intra frames update all 3 references
394 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
395 * If the update (golden|altref) flag is set, it's updated with the current frame
396 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
397 * If the flag is not set, the number read means:
399 * 1: VP56_FRAME_PREVIOUS
400 * 2: update golden with altref, or update altref with golden
402 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
404 VP56RangeCoder *c = &s->c;
407 return VP56_FRAME_CURRENT;
409 switch (vp8_rac_get_uint(c, 2)) {
411 return VP56_FRAME_PREVIOUS;
413 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
415 return VP56_FRAME_NONE;
418 static void vp78_reset_probability_tables(VP8Context *s)
421 for (i = 0; i < 4; i++)
422 for (j = 0; j < 16; j++)
423 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
424 sizeof(s->prob->token[i][j]));
427 static void vp78_update_probability_tables(VP8Context *s)
429 VP56RangeCoder *c = &s->c;
432 for (i = 0; i < 4; i++)
433 for (j = 0; j < 8; j++)
434 for (k = 0; k < 3; k++)
435 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
436 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
437 int prob = vp8_rac_get_uint(c, 8);
438 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
439 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443 #define VP7_MVC_SIZE 17
444 #define VP8_MVC_SIZE 19
446 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
449 VP56RangeCoder *c = &s->c;
453 for (i = 0; i < 4; i++)
454 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
456 for (i = 0; i < 3; i++)
457 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
459 // 17.2 MV probability update
460 for (i = 0; i < 2; i++)
461 for (j = 0; j < mvc_size; j++)
462 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
463 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
466 static void update_refs(VP8Context *s)
468 VP56RangeCoder *c = &s->c;
470 int update_golden = vp8_rac_get(c);
471 int update_altref = vp8_rac_get(c);
473 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
474 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
477 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481 for (j = 1; j < 3; j++) {
482 for (i = 0; i < height / 2; i++)
483 memcpy(dst->data[j] + i * dst->linesize[j],
484 src->data[j] + i * src->linesize[j], width / 2);
488 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
489 const uint8_t *src, ptrdiff_t src_linesize,
490 int width, int height,
494 for (j = 0; j < height; j++) {
495 const uint8_t *src2 = src + j * src_linesize;
496 uint8_t *dst2 = dst + j * dst_linesize;
497 for (i = 0; i < width; i++) {
499 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
504 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
506 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
507 int beta = (int8_t) vp8_rac_get_uint(c, 8);
510 if (!s->keyframe && (alpha || beta)) {
511 int width = s->mb_width * 16;
512 int height = s->mb_height * 16;
515 if (!s->framep[VP56_FRAME_PREVIOUS] ||
516 !s->framep[VP56_FRAME_GOLDEN]) {
517 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
518 return AVERROR_INVALIDDATA;
522 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
524 /* preserve the golden frame, write a new previous frame */
525 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
526 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
527 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
530 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
532 copy_chroma(dst, src, width, height);
535 fade(dst->data[0], dst->linesize[0],
536 src->data[0], src->linesize[0],
537 width, height, alpha, beta);
543 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
545 VP56RangeCoder *c = &s->c;
546 int part1_size, hscale, vscale, i, j, ret;
547 int width = s->avctx->width;
548 int height = s->avctx->height;
551 return AVERROR_INVALIDDATA;
554 s->profile = (buf[0] >> 1) & 7;
555 if (s->profile > 1) {
556 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
557 return AVERROR_INVALIDDATA;
560 s->keyframe = !(buf[0] & 1);
562 part1_size = AV_RL24(buf) >> 4;
564 if (buf_size < 4 - s->profile + part1_size) {
565 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
566 return AVERROR_INVALIDDATA;
569 buf += 4 - s->profile;
570 buf_size -= 4 - s->profile;
572 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
574 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
578 buf_size -= part1_size;
580 /* A. Dimension information (keyframes only) */
582 width = vp8_rac_get_uint(c, 12);
583 height = vp8_rac_get_uint(c, 12);
584 hscale = vp8_rac_get_uint(c, 2);
585 vscale = vp8_rac_get_uint(c, 2);
586 if (hscale || vscale)
587 avpriv_request_sample(s->avctx, "Upscaling");
589 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
590 vp78_reset_probability_tables(s);
591 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
592 sizeof(s->prob->pred16x16));
593 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
594 sizeof(s->prob->pred8x8c));
595 for (i = 0; i < 2; i++)
596 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
597 sizeof(vp7_mv_default_prob[i]));
598 memset(&s->segmentation, 0, sizeof(s->segmentation));
599 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
600 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
603 if (s->keyframe || s->profile > 0)
604 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
606 /* B. Decoding information for all four macroblock-level features */
607 for (i = 0; i < 4; i++) {
608 s->feature_enabled[i] = vp8_rac_get(c);
609 if (s->feature_enabled[i]) {
610 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
612 for (j = 0; j < 3; j++)
613 s->feature_index_prob[i][j] =
614 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
616 if (vp7_feature_value_size[s->profile][i])
617 for (j = 0; j < 4; j++)
618 s->feature_value[i][j] =
619 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
623 s->segmentation.enabled = 0;
624 s->segmentation.update_map = 0;
625 s->lf_delta.enabled = 0;
627 s->num_coeff_partitions = 1;
628 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
632 if (!s->macroblocks_base || /* first frame */
633 width != s->avctx->width || height != s->avctx->height ||
634 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
635 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
639 /* C. Dequantization indices */
642 /* D. Golden frame update flag (a Flag) for interframes only */
644 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
645 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
649 s->update_probabilities = 1;
652 if (s->profile > 0) {
653 s->update_probabilities = vp8_rac_get(c);
654 if (!s->update_probabilities)
655 s->prob[1] = s->prob[0];
658 s->fade_present = vp8_rac_get(c);
661 if (c->end <= c->buffer && c->bits >= 0)
662 return AVERROR_INVALIDDATA;
663 /* E. Fading information for previous frame */
664 if (s->fade_present && vp8_rac_get(c)) {
665 if ((ret = vp7_fade_frame(s ,c)) < 0)
669 /* F. Loop filter type */
671 s->filter.simple = vp8_rac_get(c);
673 /* G. DCT coefficient ordering specification */
675 for (i = 1; i < 16; i++)
676 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
678 /* H. Loop filter levels */
680 s->filter.simple = vp8_rac_get(c);
681 s->filter.level = vp8_rac_get_uint(c, 6);
682 s->filter.sharpness = vp8_rac_get_uint(c, 3);
684 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
685 vp78_update_probability_tables(s);
687 s->mbskip_enabled = 0;
689 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
691 s->prob->intra = vp8_rac_get_uint(c, 8);
692 s->prob->last = vp8_rac_get_uint(c, 8);
693 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
699 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
701 VP56RangeCoder *c = &s->c;
702 int header_size, hscale, vscale, ret;
703 int width = s->avctx->width;
704 int height = s->avctx->height;
707 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
708 return AVERROR_INVALIDDATA;
711 s->keyframe = !(buf[0] & 1);
712 s->profile = (buf[0]>>1) & 7;
713 s->invisible = !(buf[0] & 0x10);
714 header_size = AV_RL24(buf) >> 5;
718 s->header_partition_size = header_size;
721 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
724 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
725 sizeof(s->put_pixels_tab));
726 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
727 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
728 sizeof(s->put_pixels_tab));
730 if (header_size > buf_size - 7 * s->keyframe) {
731 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
732 return AVERROR_INVALIDDATA;
736 if (AV_RL24(buf) != 0x2a019d) {
737 av_log(s->avctx, AV_LOG_ERROR,
738 "Invalid start code 0x%x\n", AV_RL24(buf));
739 return AVERROR_INVALIDDATA;
741 width = AV_RL16(buf + 3) & 0x3fff;
742 height = AV_RL16(buf + 5) & 0x3fff;
743 hscale = buf[4] >> 6;
744 vscale = buf[6] >> 6;
748 if (hscale || vscale)
749 avpriv_request_sample(s->avctx, "Upscaling");
751 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
752 vp78_reset_probability_tables(s);
753 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
754 sizeof(s->prob->pred16x16));
755 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
756 sizeof(s->prob->pred8x8c));
757 memcpy(s->prob->mvc, vp8_mv_default_prob,
758 sizeof(s->prob->mvc));
759 memset(&s->segmentation, 0, sizeof(s->segmentation));
760 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
763 ret = ff_vp56_init_range_decoder(c, buf, header_size);
767 buf_size -= header_size;
770 s->colorspace = vp8_rac_get(c);
772 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
773 s->fullrange = vp8_rac_get(c);
776 if ((s->segmentation.enabled = vp8_rac_get(c)))
777 parse_segment_info(s);
779 s->segmentation.update_map = 0; // FIXME: move this to some init function?
781 s->filter.simple = vp8_rac_get(c);
782 s->filter.level = vp8_rac_get_uint(c, 6);
783 s->filter.sharpness = vp8_rac_get_uint(c, 3);
785 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
786 s->lf_delta.update = vp8_rac_get(c);
787 if (s->lf_delta.update)
791 if (setup_partitions(s, buf, buf_size)) {
792 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
793 return AVERROR_INVALIDDATA;
796 if (!s->macroblocks_base || /* first frame */
797 width != s->avctx->width || height != s->avctx->height ||
798 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
799 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
806 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
807 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
810 // if we aren't saving this frame's probabilities for future frames,
811 // make a copy of the current probabilities
812 if (!(s->update_probabilities = vp8_rac_get(c)))
813 s->prob[1] = s->prob[0];
815 s->update_last = s->keyframe || vp8_rac_get(c);
817 vp78_update_probability_tables(s);
819 if ((s->mbskip_enabled = vp8_rac_get(c)))
820 s->prob->mbskip = vp8_rac_get_uint(c, 8);
823 s->prob->intra = vp8_rac_get_uint(c, 8);
824 s->prob->last = vp8_rac_get_uint(c, 8);
825 s->prob->golden = vp8_rac_get_uint(c, 8);
826 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
829 // Record the entropy coder state here so that hwaccels can use it.
830 s->c.code_word = vp56_rac_renorm(&s->c);
831 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
832 s->coder_state_at_header_end.range = s->c.high;
833 s->coder_state_at_header_end.value = s->c.code_word >> 16;
834 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
839 static av_always_inline
840 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
842 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
843 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
844 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
845 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
849 * Motion vector coding, 17.1.
851 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
855 if (vp56_rac_get_prob_branchy(c, p[0])) {
858 for (i = 0; i < 3; i++)
859 x += vp56_rac_get_prob(c, p[9 + i]) << i;
860 for (i = (vp7 ? 7 : 9); i > 3; i--)
861 x += vp56_rac_get_prob(c, p[9 + i]) << i;
862 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
866 const uint8_t *ps = p + 2;
867 bit = vp56_rac_get_prob(c, *ps);
870 bit = vp56_rac_get_prob(c, *ps);
873 x += vp56_rac_get_prob(c, *ps);
876 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
879 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
881 return read_mv_component(c, p, 1);
884 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
886 return read_mv_component(c, p, 0);
889 static av_always_inline
890 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
893 return vp7_submv_prob;
896 return vp8_submv_prob[4 - !!left];
898 return vp8_submv_prob[2];
899 return vp8_submv_prob[1 - !!left];
903 * Split motion vector prediction, 16.4.
904 * @returns the number of motion vectors parsed (2, 4 or 16)
906 static av_always_inline
907 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
908 int layout, int is_vp7)
912 VP8Macroblock *top_mb;
913 VP8Macroblock *left_mb = &mb[-1];
914 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
915 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
917 VP56mv *left_mv = left_mb->bmv;
918 VP56mv *cur_mv = mb->bmv;
920 if (!layout) // layout is inlined, s->mb_layout is not
923 top_mb = &mb[-s->mb_width - 1];
924 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
925 top_mv = top_mb->bmv;
927 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
928 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
929 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
931 part_idx = VP8_SPLITMVMODE_8x8;
933 part_idx = VP8_SPLITMVMODE_4x4;
936 num = vp8_mbsplit_count[part_idx];
937 mbsplits_cur = vp8_mbsplits[part_idx],
938 firstidx = vp8_mbfirstidx[part_idx];
939 mb->partitioning = part_idx;
941 for (n = 0; n < num; n++) {
943 uint32_t left, above;
944 const uint8_t *submv_prob;
947 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
949 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
951 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
953 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
955 submv_prob = get_submv_prob(left, above, is_vp7);
957 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
958 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
959 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
960 mb->bmv[n].y = mb->mv.y +
961 read_mv_component(c, s->prob->mvc[0], is_vp7);
962 mb->bmv[n].x = mb->mv.x +
963 read_mv_component(c, s->prob->mvc[1], is_vp7);
965 AV_ZERO32(&mb->bmv[n]);
968 AV_WN32A(&mb->bmv[n], above);
971 AV_WN32A(&mb->bmv[n], left);
979 * The vp7 reference decoder uses a padding macroblock column (added to right
980 * edge of the frame) to guard against illegal macroblock offsets. The
981 * algorithm has bugs that permit offsets to straddle the padding column.
982 * This function replicates those bugs.
984 * @param[out] edge_x macroblock x address
985 * @param[out] edge_y macroblock y address
987 * @return macroblock offset legal (boolean)
989 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
990 int xoffset, int yoffset, int boundary,
991 int *edge_x, int *edge_y)
993 int vwidth = mb_width + 1;
994 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
995 if (new < boundary || new % vwidth == vwidth - 1)
997 *edge_y = new / vwidth;
998 *edge_x = new % vwidth;
1002 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1004 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1007 static av_always_inline
1008 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1009 int mb_x, int mb_y, int layout)
1011 VP8Macroblock *mb_edge[12];
1012 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1013 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1016 uint8_t cnt[3] = { 0 };
1017 VP56RangeCoder *c = &s->c;
1020 AV_ZERO32(&near_mv[0]);
1021 AV_ZERO32(&near_mv[1]);
1022 AV_ZERO32(&near_mv[2]);
1024 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1025 const VP7MVPred * pred = &vp7_mv_pred[i];
1028 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1029 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1030 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1031 ? s->macroblocks_base + 1 + edge_x +
1032 (s->mb_width + 1) * (edge_y + 1)
1033 : s->macroblocks + edge_x +
1034 (s->mb_height - edge_y - 1) * 2;
1035 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1037 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1038 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1040 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1041 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1045 AV_WN32A(&near_mv[CNT_NEAR], mv);
1049 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1058 cnt[idx] += vp7_mv_pred[i].score;
1061 mb->partitioning = VP8_SPLITMVMODE_NONE;
1063 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1064 mb->mode = VP8_MVMODE_MV;
1066 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1068 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1070 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1071 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1073 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1075 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1076 mb->mode = VP8_MVMODE_SPLIT;
1077 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1079 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1080 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1081 mb->bmv[0] = mb->mv;
1084 mb->mv = near_mv[CNT_NEAR];
1085 mb->bmv[0] = mb->mv;
1088 mb->mv = near_mv[CNT_NEAREST];
1089 mb->bmv[0] = mb->mv;
1092 mb->mode = VP8_MVMODE_ZERO;
1094 mb->bmv[0] = mb->mv;
1098 static av_always_inline
1099 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1100 int mb_x, int mb_y, int layout)
1102 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1105 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1106 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1108 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1109 int8_t *sign_bias = s->sign_bias;
1111 uint8_t cnt[4] = { 0 };
1112 VP56RangeCoder *c = &s->c;
1114 if (!layout) { // layout is inlined (s->mb_layout is not)
1115 mb_edge[0] = mb + 2;
1116 mb_edge[2] = mb + 1;
1118 mb_edge[0] = mb - s->mb_width - 1;
1119 mb_edge[2] = mb - s->mb_width - 2;
1122 AV_ZERO32(&near_mv[0]);
1123 AV_ZERO32(&near_mv[1]);
1124 AV_ZERO32(&near_mv[2]);
1126 /* Process MB on top, left and top-left */
1127 #define MV_EDGE_CHECK(n) \
1129 VP8Macroblock *edge = mb_edge[n]; \
1130 int edge_ref = edge->ref_frame; \
1131 if (edge_ref != VP56_FRAME_CURRENT) { \
1132 uint32_t mv = AV_RN32A(&edge->mv); \
1134 if (cur_sign_bias != sign_bias[edge_ref]) { \
1135 /* SWAR negate of the values in mv. */ \
1137 mv = ((mv & 0x7fff7fff) + \
1138 0x00010001) ^ (mv & 0x80008000); \
1140 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1141 AV_WN32A(&near_mv[++idx], mv); \
1142 cnt[idx] += 1 + (n != 2); \
1144 cnt[CNT_ZERO] += 1 + (n != 2); \
1152 mb->partitioning = VP8_SPLITMVMODE_NONE;
1153 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1154 mb->mode = VP8_MVMODE_MV;
1156 /* If we have three distinct MVs, merge first and last if they're the same */
1157 if (cnt[CNT_SPLITMV] &&
1158 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1159 cnt[CNT_NEAREST] += 1;
1161 /* Swap near and nearest if necessary */
1162 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1163 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1164 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1167 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1168 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1169 /* Choose the best mv out of 0,0 and the nearest mv */
1170 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1171 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1172 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1173 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1175 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1176 mb->mode = VP8_MVMODE_SPLIT;
1177 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1179 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1180 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1181 mb->bmv[0] = mb->mv;
1184 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1185 mb->bmv[0] = mb->mv;
1188 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1189 mb->bmv[0] = mb->mv;
1192 mb->mode = VP8_MVMODE_ZERO;
1194 mb->bmv[0] = mb->mv;
1198 static av_always_inline
1199 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1200 int mb_x, int keyframe, int layout)
1202 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1205 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1206 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1211 uint8_t *const left = s->intra4x4_pred_mode_left;
1213 top = mb->intra4x4_pred_mode_top;
1215 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1216 for (y = 0; y < 4; y++) {
1217 for (x = 0; x < 4; x++) {
1219 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1220 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1221 left[y] = top[x] = *intra4x4;
1227 for (i = 0; i < 16; i++)
1228 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1229 vp8_pred4x4_prob_inter);
1233 static av_always_inline
1234 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1235 VP8Macroblock *mb, int mb_x, int mb_y,
1236 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1238 VP56RangeCoder *c = &s->c;
1239 static const char * const vp7_feature_name[] = { "q-index",
1241 "partial-golden-update",
1246 for (i = 0; i < 4; i++) {
1247 if (s->feature_enabled[i]) {
1248 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1249 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1250 s->feature_index_prob[i]);
1251 av_log(s->avctx, AV_LOG_WARNING,
1252 "Feature %s present in macroblock (value 0x%x)\n",
1253 vp7_feature_name[i], s->feature_value[i][index]);
1257 } else if (s->segmentation.update_map) {
1258 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1259 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1260 } else if (s->segmentation.enabled)
1261 *segment = ref ? *ref : *segment;
1262 mb->segment = *segment;
1264 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1267 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1268 vp8_pred16x16_prob_intra);
1270 if (mb->mode == MODE_I4x4) {
1271 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1273 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1274 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1276 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1278 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1279 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1282 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1283 vp8_pred8x8c_prob_intra);
1284 mb->ref_frame = VP56_FRAME_CURRENT;
1285 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1287 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1289 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1290 : VP56_FRAME_GOLDEN;
1292 mb->ref_frame = VP56_FRAME_PREVIOUS;
1293 s->ref_count[mb->ref_frame - 1]++;
1295 // motion vectors, 16.3
1297 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1299 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1302 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1304 if (mb->mode == MODE_I4x4)
1305 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1307 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1309 mb->ref_frame = VP56_FRAME_CURRENT;
1310 mb->partitioning = VP8_SPLITMVMODE_NONE;
1311 AV_ZERO32(&mb->bmv[0]);
1316 * @param r arithmetic bitstream reader context
1317 * @param block destination for block coefficients
1318 * @param probs probabilities to use when reading trees from the bitstream
1319 * @param i initial coeff index, 0 unless a separate DC block is coded
1320 * @param qmul array holding the dc/ac dequant factor at position 0/1
1322 * @return 0 if no coeffs were decoded
1323 * otherwise, the index of the last coeff decoded plus one
1325 static av_always_inline
1326 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1327 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1328 int i, uint8_t *token_prob, int16_t qmul[2],
1329 const uint8_t scan[16], int vp7)
1331 VP56RangeCoder c = *r;
1336 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1340 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1342 break; // invalid input; blocks should end with EOB
1343 token_prob = probs[i][0];
1349 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1351 token_prob = probs[i + 1][1];
1353 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1354 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1356 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1360 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1361 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1362 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1363 } else { // DCT_CAT2
1365 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1366 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1368 } else { // DCT_CAT3 and up
1369 int a = vp56_rac_get_prob(&c, token_prob[8]);
1370 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1371 int cat = (a << 1) + b;
1372 coeff = 3 + (8 << cat);
1373 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1376 token_prob = probs[i + 1][2];
1378 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1385 static av_always_inline
1386 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1388 int16_t dc = block[0];
1396 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1397 block[0] = pred[0] = dc;
1402 block[0] = pred[0] = dc;
1408 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1410 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1411 int i, uint8_t *token_prob,
1413 const uint8_t scan[16])
1415 return decode_block_coeffs_internal(r, block, probs, i,
1416 token_prob, qmul, scan, IS_VP7);
1419 #ifndef vp8_decode_block_coeffs_internal
1420 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1422 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1423 int i, uint8_t *token_prob,
1426 return decode_block_coeffs_internal(r, block, probs, i,
1427 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1432 * @param c arithmetic bitstream reader context
1433 * @param block destination for block coefficients
1434 * @param probs probabilities to use when reading trees from the bitstream
1435 * @param i initial coeff index, 0 unless a separate DC block is coded
1436 * @param zero_nhood the initial prediction context for number of surrounding
1437 * all-zero blocks (only left/top, so 0-2)
1438 * @param qmul array holding the dc/ac dequant factor at position 0/1
1439 * @param scan scan pattern (VP7 only)
1441 * @return 0 if no coeffs were decoded
1442 * otherwise, the index of the last coeff decoded plus one
1444 static av_always_inline
1445 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1446 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1447 int i, int zero_nhood, int16_t qmul[2],
1448 const uint8_t scan[16], int vp7)
1450 uint8_t *token_prob = probs[i][zero_nhood];
1451 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1453 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1454 token_prob, qmul, scan)
1455 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1459 static av_always_inline
1460 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1461 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1464 int i, x, y, luma_start = 0, luma_ctx = 3;
1465 int nnz_pred, nnz, nnz_total = 0;
1466 int segment = mb->segment;
1469 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1470 nnz_pred = t_nnz[8] + l_nnz[8];
1472 // decode DC values and do hadamard
1473 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1474 nnz_pred, s->qmat[segment].luma_dc_qmul,
1475 ff_zigzag_scan, is_vp7);
1476 l_nnz[8] = t_nnz[8] = !!nnz;
1478 if (is_vp7 && mb->mode > MODE_I4x4) {
1479 nnz |= inter_predict_dc(td->block_dc,
1480 s->inter_dc_pred[mb->ref_frame - 1]);
1487 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1489 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1496 for (y = 0; y < 4; y++)
1497 for (x = 0; x < 4; x++) {
1498 nnz_pred = l_nnz[y] + t_nnz[x];
1499 nnz = decode_block_coeffs(c, td->block[y][x],
1500 s->prob->token[luma_ctx],
1501 luma_start, nnz_pred,
1502 s->qmat[segment].luma_qmul,
1503 s->prob[0].scan, is_vp7);
1504 /* nnz+block_dc may be one more than the actual last index,
1505 * but we don't care */
1506 td->non_zero_count_cache[y][x] = nnz + block_dc;
1507 t_nnz[x] = l_nnz[y] = !!nnz;
1512 // TODO: what to do about dimensions? 2nd dim for luma is x,
1513 // but for chroma it's (y<<1)|x
1514 for (i = 4; i < 6; i++)
1515 for (y = 0; y < 2; y++)
1516 for (x = 0; x < 2; x++) {
1517 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1518 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1519 s->prob->token[2], 0, nnz_pred,
1520 s->qmat[segment].chroma_qmul,
1521 s->prob[0].scan, is_vp7);
1522 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1523 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1527 // if there were no coded coeffs despite the macroblock not being marked skip,
1528 // we MUST not do the inner loop filter and should not do IDCT
1529 // Since skip isn't used for bitstream prediction, just manually set it.
1534 static av_always_inline
1535 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1536 uint8_t *src_cb, uint8_t *src_cr,
1537 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1539 AV_COPY128(top_border, src_y + 15 * linesize);
1541 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1542 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1546 static av_always_inline
1547 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1548 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1549 int mb_y, int mb_width, int simple, int xchg)
1551 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1553 src_cb -= uvlinesize;
1554 src_cr -= uvlinesize;
1556 #define XCHG(a, b, xchg) \
1564 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1565 XCHG(top_border, src_y, xchg);
1566 XCHG(top_border + 8, src_y + 8, 1);
1567 if (mb_x < mb_width - 1)
1568 XCHG(top_border + 32, src_y + 16, 1);
1570 // only copy chroma for normal loop filter
1571 // or to initialize the top row to 127
1572 if (!simple || !mb_y) {
1573 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1574 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1575 XCHG(top_border + 16, src_cb, 1);
1576 XCHG(top_border + 24, src_cr, 1);
1580 static av_always_inline
1581 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1584 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1586 return mb_y ? mode : LEFT_DC_PRED8x8;
1589 static av_always_inline
1590 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1593 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1595 return mb_y ? mode : HOR_PRED8x8;
1598 static av_always_inline
1599 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1603 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1605 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1607 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1608 case PLANE_PRED8x8: /* TM */
1609 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1614 static av_always_inline
1615 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1618 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1620 return mb_y ? mode : HOR_VP8_PRED;
1624 static av_always_inline
1625 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1626 int *copy_buf, int vp7)
1630 if (!mb_x && mb_y) {
1635 case DIAG_DOWN_LEFT_PRED:
1636 case VERT_LEFT_PRED:
1637 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1645 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1647 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1648 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1649 * as 16x16/8x8 DC */
1650 case DIAG_DOWN_RIGHT_PRED:
1651 case VERT_RIGHT_PRED:
1660 static av_always_inline
1661 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1662 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1664 int x, y, mode, nnz;
1667 /* for the first row, we need to run xchg_mb_border to init the top edge
1668 * to 127 otherwise, skip it if we aren't going to deblock */
1669 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1670 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1671 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1672 s->filter.simple, 1);
1674 if (mb->mode < MODE_I4x4) {
1675 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1676 s->hpc.pred16x16[mode](dst[0], s->linesize);
1678 uint8_t *ptr = dst[0];
1679 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1680 const uint8_t lo = is_vp7 ? 128 : 127;
1681 const uint8_t hi = is_vp7 ? 128 : 129;
1682 uint8_t tr_top[4] = { lo, lo, lo, lo };
1684 // all blocks on the right edge of the macroblock use bottom edge
1685 // the top macroblock for their topright edge
1686 uint8_t *tr_right = ptr - s->linesize + 16;
1688 // if we're on the right edge of the frame, said edge is extended
1689 // from the top macroblock
1690 if (mb_y && mb_x == s->mb_width - 1) {
1691 tr = tr_right[-1] * 0x01010101u;
1692 tr_right = (uint8_t *) &tr;
1696 AV_ZERO128(td->non_zero_count_cache);
1698 for (y = 0; y < 4; y++) {
1699 uint8_t *topright = ptr + 4 - s->linesize;
1700 for (x = 0; x < 4; x++) {
1702 ptrdiff_t linesize = s->linesize;
1703 uint8_t *dst = ptr + 4 * x;
1704 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1706 if ((y == 0 || x == 3) && mb_y == 0) {
1709 topright = tr_right;
1711 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1712 mb_y + y, ©, is_vp7);
1714 dst = copy_dst + 12;
1718 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1720 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1724 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1733 copy_dst[11] = ptr[4 * x - 1];
1734 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1735 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1736 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1739 s->hpc.pred4x4[mode](dst, topright, linesize);
1741 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1742 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1743 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1744 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1747 nnz = td->non_zero_count_cache[y][x];
1750 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1751 td->block[y][x], s->linesize);
1753 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1754 td->block[y][x], s->linesize);
1759 ptr += 4 * s->linesize;
1764 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1765 mb_x, mb_y, is_vp7);
1766 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1767 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1769 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1770 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1771 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1772 s->filter.simple, 0);
1775 static const uint8_t subpel_idx[3][8] = {
1776 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1777 // also function pointer index
1778 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1779 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1785 * @param s VP8 decoding context
1786 * @param dst target buffer for block data at block position
1787 * @param ref reference picture buffer at origin (0, 0)
1788 * @param mv motion vector (relative to block position) to get pixel data from
1789 * @param x_off horizontal position of block from origin (0, 0)
1790 * @param y_off vertical position of block from origin (0, 0)
1791 * @param block_w width of block (16, 8 or 4)
1792 * @param block_h height of block (always same as block_w)
1793 * @param width width of src/dst plane data
1794 * @param height height of src/dst plane data
1795 * @param linesize size of a single line of plane data, including padding
1796 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1798 static av_always_inline
1799 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1800 ThreadFrame *ref, const VP56mv *mv,
1801 int x_off, int y_off, int block_w, int block_h,
1802 int width, int height, ptrdiff_t linesize,
1803 vp8_mc_func mc_func[3][3])
1805 uint8_t *src = ref->f->data[0];
1808 ptrdiff_t src_linesize = linesize;
1810 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1811 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1813 x_off += mv->x >> 2;
1814 y_off += mv->y >> 2;
1817 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1818 src += y_off * linesize + x_off;
1819 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1820 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1821 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1822 src - my_idx * linesize - mx_idx,
1823 EDGE_EMU_LINESIZE, linesize,
1824 block_w + subpel_idx[1][mx],
1825 block_h + subpel_idx[1][my],
1826 x_off - mx_idx, y_off - my_idx,
1828 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1829 src_linesize = EDGE_EMU_LINESIZE;
1831 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1833 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1834 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1835 linesize, block_h, 0, 0);
1840 * chroma MC function
1842 * @param s VP8 decoding context
1843 * @param dst1 target buffer for block data at block position (U plane)
1844 * @param dst2 target buffer for block data at block position (V plane)
1845 * @param ref reference picture buffer at origin (0, 0)
1846 * @param mv motion vector (relative to block position) to get pixel data from
1847 * @param x_off horizontal position of block from origin (0, 0)
1848 * @param y_off vertical position of block from origin (0, 0)
1849 * @param block_w width of block (16, 8 or 4)
1850 * @param block_h height of block (always same as block_w)
1851 * @param width width of src/dst plane data
1852 * @param height height of src/dst plane data
1853 * @param linesize size of a single line of plane data, including padding
1854 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1856 static av_always_inline
1857 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1858 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1859 int x_off, int y_off, int block_w, int block_h,
1860 int width, int height, ptrdiff_t linesize,
1861 vp8_mc_func mc_func[3][3])
1863 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1866 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1867 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1869 x_off += mv->x >> 3;
1870 y_off += mv->y >> 3;
1873 src1 += y_off * linesize + x_off;
1874 src2 += y_off * linesize + x_off;
1875 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1876 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1877 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1878 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1879 src1 - my_idx * linesize - mx_idx,
1880 EDGE_EMU_LINESIZE, linesize,
1881 block_w + subpel_idx[1][mx],
1882 block_h + subpel_idx[1][my],
1883 x_off - mx_idx, y_off - my_idx, width, height);
1884 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1885 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1887 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1888 src2 - my_idx * linesize - mx_idx,
1889 EDGE_EMU_LINESIZE, linesize,
1890 block_w + subpel_idx[1][mx],
1891 block_h + subpel_idx[1][my],
1892 x_off - mx_idx, y_off - my_idx, width, height);
1893 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1894 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1896 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1897 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1900 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1901 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1902 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1906 static av_always_inline
1907 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1908 ThreadFrame *ref_frame, int x_off, int y_off,
1909 int bx_off, int by_off, int block_w, int block_h,
1910 int width, int height, VP56mv *mv)
1915 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1916 ref_frame, mv, x_off + bx_off, y_off + by_off,
1917 block_w, block_h, width, height, s->linesize,
1918 s->put_pixels_tab[block_w == 8]);
1921 if (s->profile == 3) {
1922 /* this block only applies VP8; it is safe to check
1923 * only the profile, as VP7 profile <= 1 */
1935 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1936 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1937 &uvmv, x_off + bx_off, y_off + by_off,
1938 block_w, block_h, width, height, s->uvlinesize,
1939 s->put_pixels_tab[1 + (block_w == 4)]);
1942 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1943 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1944 static av_always_inline
1945 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1948 /* Don't prefetch refs that haven't been used very often this frame. */
1949 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1950 int x_off = mb_x << 4, y_off = mb_y << 4;
1951 int mx = (mb->mv.x >> 2) + x_off + 8;
1952 int my = (mb->mv.y >> 2) + y_off;
1953 uint8_t **src = s->framep[ref]->tf.f->data;
1954 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1955 /* For threading, a ff_thread_await_progress here might be useful, but
1956 * it actually slows down the decoder. Since a bad prefetch doesn't
1957 * generate bad decoder output, we don't run it here. */
1958 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1959 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1960 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1965 * Apply motion vectors to prediction buffer, chapter 18.
1967 static av_always_inline
1968 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1969 VP8Macroblock *mb, int mb_x, int mb_y)
1971 int x_off = mb_x << 4, y_off = mb_y << 4;
1972 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1973 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1974 VP56mv *bmv = mb->bmv;
1976 switch (mb->partitioning) {
1977 case VP8_SPLITMVMODE_NONE:
1978 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1979 0, 0, 16, 16, width, height, &mb->mv);
1981 case VP8_SPLITMVMODE_4x4: {
1986 for (y = 0; y < 4; y++) {
1987 for (x = 0; x < 4; x++) {
1988 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1989 ref, &bmv[4 * y + x],
1990 4 * x + x_off, 4 * y + y_off, 4, 4,
1991 width, height, s->linesize,
1992 s->put_pixels_tab[2]);
2001 for (y = 0; y < 2; y++) {
2002 for (x = 0; x < 2; x++) {
2003 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2004 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2005 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2006 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2007 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2008 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2009 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2010 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2011 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2012 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2013 if (s->profile == 3) {
2017 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2018 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2019 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2020 width, height, s->uvlinesize,
2021 s->put_pixels_tab[2]);
2026 case VP8_SPLITMVMODE_16x8:
2027 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2028 0, 0, 16, 8, width, height, &bmv[0]);
2029 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2030 0, 8, 16, 8, width, height, &bmv[1]);
2032 case VP8_SPLITMVMODE_8x16:
2033 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2034 0, 0, 8, 16, width, height, &bmv[0]);
2035 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2036 8, 0, 8, 16, width, height, &bmv[1]);
2038 case VP8_SPLITMVMODE_8x8:
2039 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040 0, 0, 8, 8, width, height, &bmv[0]);
2041 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042 8, 0, 8, 8, width, height, &bmv[1]);
2043 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2044 0, 8, 8, 8, width, height, &bmv[2]);
2045 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2046 8, 8, 8, 8, width, height, &bmv[3]);
2051 static av_always_inline
2052 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2056 if (mb->mode != MODE_I4x4) {
2057 uint8_t *y_dst = dst[0];
2058 for (y = 0; y < 4; y++) {
2059 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2061 if (nnz4 & ~0x01010101) {
2062 for (x = 0; x < 4; x++) {
2063 if ((uint8_t) nnz4 == 1)
2064 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2067 else if ((uint8_t) nnz4 > 1)
2068 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2076 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2079 y_dst += 4 * s->linesize;
2083 for (ch = 0; ch < 2; ch++) {
2084 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2086 uint8_t *ch_dst = dst[1 + ch];
2087 if (nnz4 & ~0x01010101) {
2088 for (y = 0; y < 2; y++) {
2089 for (x = 0; x < 2; x++) {
2090 if ((uint8_t) nnz4 == 1)
2091 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2092 td->block[4 + ch][(y << 1) + x],
2094 else if ((uint8_t) nnz4 > 1)
2095 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2096 td->block[4 + ch][(y << 1) + x],
2100 goto chroma_idct_end;
2102 ch_dst += 4 * s->uvlinesize;
2105 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2113 static av_always_inline
2114 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2115 VP8FilterStrength *f, int is_vp7)
2117 int interior_limit, filter_level;
2119 if (s->segmentation.enabled) {
2120 filter_level = s->segmentation.filter_level[mb->segment];
2121 if (!s->segmentation.absolute_vals)
2122 filter_level += s->filter.level;
2124 filter_level = s->filter.level;
2126 if (s->lf_delta.enabled) {
2127 filter_level += s->lf_delta.ref[mb->ref_frame];
2128 filter_level += s->lf_delta.mode[mb->mode];
2131 filter_level = av_clip_uintp2(filter_level, 6);
2133 interior_limit = filter_level;
2134 if (s->filter.sharpness) {
2135 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2136 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2138 interior_limit = FFMAX(interior_limit, 1);
2140 f->filter_level = filter_level;
2141 f->inner_limit = interior_limit;
2142 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2143 mb->mode == VP8_MVMODE_SPLIT;
2146 static av_always_inline
2147 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2148 int mb_x, int mb_y, int is_vp7)
2150 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2151 int filter_level = f->filter_level;
2152 int inner_limit = f->inner_limit;
2153 int inner_filter = f->inner_filter;
2154 ptrdiff_t linesize = s->linesize;
2155 ptrdiff_t uvlinesize = s->uvlinesize;
2156 static const uint8_t hev_thresh_lut[2][64] = {
2157 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2158 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2159 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2161 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2162 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2163 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2171 bedge_lim_y = filter_level;
2172 bedge_lim_uv = filter_level * 2;
2173 mbedge_lim = filter_level + 2;
2176 bedge_lim_uv = filter_level * 2 + inner_limit;
2177 mbedge_lim = bedge_lim_y + 4;
2180 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2183 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2184 mbedge_lim, inner_limit, hev_thresh);
2185 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2186 mbedge_lim, inner_limit, hev_thresh);
2189 #define H_LOOP_FILTER_16Y_INNER(cond) \
2190 if (cond && inner_filter) { \
2191 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2192 bedge_lim_y, inner_limit, \
2194 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2195 bedge_lim_y, inner_limit, \
2197 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2198 bedge_lim_y, inner_limit, \
2200 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2201 uvlinesize, bedge_lim_uv, \
2202 inner_limit, hev_thresh); \
2205 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2208 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2209 mbedge_lim, inner_limit, hev_thresh);
2210 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2211 mbedge_lim, inner_limit, hev_thresh);
2215 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2216 linesize, bedge_lim_y,
2217 inner_limit, hev_thresh);
2218 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2219 linesize, bedge_lim_y,
2220 inner_limit, hev_thresh);
2221 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2222 linesize, bedge_lim_y,
2223 inner_limit, hev_thresh);
2224 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2225 dst[2] + 4 * uvlinesize,
2226 uvlinesize, bedge_lim_uv,
2227 inner_limit, hev_thresh);
2230 H_LOOP_FILTER_16Y_INNER(is_vp7)
2233 static av_always_inline
2234 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2237 int mbedge_lim, bedge_lim;
2238 int filter_level = f->filter_level;
2239 int inner_limit = f->inner_limit;
2240 int inner_filter = f->inner_filter;
2241 ptrdiff_t linesize = s->linesize;
2246 bedge_lim = 2 * filter_level + inner_limit;
2247 mbedge_lim = bedge_lim + 4;
2250 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2252 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2253 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2254 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2258 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2260 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2261 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2262 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2266 #define MARGIN (16 << 2)
2267 static av_always_inline
2268 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2269 VP8Frame *prev_frame, int is_vp7)
2271 VP8Context *s = avctx->priv_data;
2274 s->mv_bounds.mv_min.y = -MARGIN;
2275 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2276 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2277 VP8Macroblock *mb = s->macroblocks_base +
2278 ((s->mb_width + 1) * (mb_y + 1) + 1);
2279 int mb_xy = mb_y * s->mb_width;
2281 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2283 s->mv_bounds.mv_min.x = -MARGIN;
2284 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2285 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2287 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2288 DC_PRED * 0x01010101);
2289 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2290 prev_frame && prev_frame->seg_map ?
2291 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2292 s->mv_bounds.mv_min.x -= 64;
2293 s->mv_bounds.mv_max.x -= 64;
2295 s->mv_bounds.mv_min.y -= 64;
2296 s->mv_bounds.mv_max.y -= 64;
2300 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2301 VP8Frame *prev_frame)
2303 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2306 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2307 VP8Frame *prev_frame)
2309 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2313 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2315 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2316 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2317 pthread_mutex_lock(&otd->lock); \
2318 atomic_store(&td->wait_mb_pos, tmp); \
2320 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2322 pthread_cond_wait(&otd->cond, &otd->lock); \
2324 atomic_store(&td->wait_mb_pos, INT_MAX); \
2325 pthread_mutex_unlock(&otd->lock); \
2329 #define update_pos(td, mb_y, mb_x) \
2331 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2332 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2334 int is_null = !next_td || !prev_td; \
2335 int pos_check = (is_null) ? 1 : \
2336 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2337 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2338 atomic_store(&td->thread_mb_pos, pos); \
2339 if (sliced_threading && pos_check) { \
2340 pthread_mutex_lock(&td->lock); \
2341 pthread_cond_broadcast(&td->cond); \
2342 pthread_mutex_unlock(&td->lock); \
2346 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2347 #define update_pos(td, mb_y, mb_x) while(0)
2350 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2351 int jobnr, int threadnr, int is_vp7)
2353 VP8Context *s = avctx->priv_data;
2354 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2355 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2356 int mb_x, mb_xy = mb_y * s->mb_width;
2357 int num_jobs = s->num_jobs;
2358 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2359 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2362 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2363 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2364 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2367 if (c->end <= c->buffer && c->bits >= 0)
2368 return AVERROR_INVALIDDATA;
2373 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2374 if (mb_y == s->mb_height - 1)
2377 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2378 if (s->mb_layout == 1)
2379 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2381 // Make sure the previous frame has read its segmentation map,
2382 // if we re-use the same map.
2383 if (prev_frame && s->segmentation.enabled &&
2384 !s->segmentation.update_map)
2385 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2386 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2387 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2388 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2391 if (!is_vp7 || mb_y == 0)
2392 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2394 td->mv_bounds.mv_min.x = -MARGIN;
2395 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2397 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2398 if (c->end <= c->buffer && c->bits >= 0)
2399 return AVERROR_INVALIDDATA;
2400 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2401 if (prev_td != td) {
2402 if (threadnr != 0) {
2403 check_thread_pos(td, prev_td,
2404 mb_x + (is_vp7 ? 2 : 1),
2405 mb_y - (is_vp7 ? 2 : 1));
2407 check_thread_pos(td, prev_td,
2408 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2409 mb_y - (is_vp7 ? 2 : 1));
2413 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2415 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2416 dst[2] - dst[1], 2);
2419 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2420 prev_frame && prev_frame->seg_map ?
2421 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2423 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2426 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2428 if (mb->mode <= MODE_I4x4)
2429 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2431 inter_predict(s, td, dst, mb, mb_x, mb_y);
2433 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2436 idct_mb(s, td, dst, mb);
2438 AV_ZERO64(td->left_nnz);
2439 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2441 /* Reset DC block predictors if they would exist
2442 * if the mb had coefficients */
2443 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2444 td->left_nnz[8] = 0;
2445 s->top_nnz[mb_x][8] = 0;
2449 if (s->deblock_filter)
2450 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2452 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2453 if (s->filter.simple)
2454 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2455 NULL, NULL, s->linesize, 0, 1);
2457 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2458 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2461 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2466 td->mv_bounds.mv_min.x -= 64;
2467 td->mv_bounds.mv_max.x -= 64;
2469 if (mb_x == s->mb_width + 1) {
2470 update_pos(td, mb_y, s->mb_width + 3);
2472 update_pos(td, mb_y, mb_x);
2478 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2479 int jobnr, int threadnr)
2481 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2484 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2485 int jobnr, int threadnr)
2487 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2490 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2491 int jobnr, int threadnr, int is_vp7)
2493 VP8Context *s = avctx->priv_data;
2494 VP8ThreadData *td = &s->thread_data[threadnr];
2495 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2496 AVFrame *curframe = s->curframe->tf.f;
2498 VP8ThreadData *prev_td, *next_td;
2500 curframe->data[0] + 16 * mb_y * s->linesize,
2501 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2502 curframe->data[2] + 8 * mb_y * s->uvlinesize
2505 if (s->mb_layout == 1)
2506 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2508 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2513 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2514 if (mb_y == s->mb_height - 1)
2517 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2519 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2520 VP8FilterStrength *f = &td->filter_strength[mb_x];
2522 check_thread_pos(td, prev_td,
2523 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2525 if (next_td != &s->thread_data[0])
2526 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2528 if (num_jobs == 1) {
2529 if (s->filter.simple)
2530 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2531 NULL, NULL, s->linesize, 0, 1);
2533 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2534 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2537 if (s->filter.simple)
2538 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2540 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2545 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2549 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2550 int jobnr, int threadnr)
2552 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2555 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2556 int jobnr, int threadnr)
2558 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2561 static av_always_inline
2562 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2563 int threadnr, int is_vp7)
2565 VP8Context *s = avctx->priv_data;
2566 VP8ThreadData *td = &s->thread_data[jobnr];
2567 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2568 VP8Frame *curframe = s->curframe;
2569 int mb_y, num_jobs = s->num_jobs;
2572 td->thread_nr = threadnr;
2573 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2574 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2575 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2576 atomic_store(&td->thread_mb_pos, mb_y << 16);
2577 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2579 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2582 if (s->deblock_filter)
2583 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2584 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2586 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2587 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2589 if (avctx->active_thread_type == FF_THREAD_FRAME)
2590 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2596 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2597 int jobnr, int threadnr)
2599 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2602 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2603 int jobnr, int threadnr)
2605 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2608 static av_always_inline
2609 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2610 AVPacket *avpkt, int is_vp7)
2612 VP8Context *s = avctx->priv_data;
2613 int ret, i, referenced, num_jobs;
2614 enum AVDiscard skip_thresh;
2615 VP8Frame *av_uninit(curframe), *prev_frame;
2618 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2620 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2625 if (s->actually_webp) {
2626 // avctx->pix_fmt already set in caller.
2627 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2628 s->pix_fmt = get_pixel_format(s);
2629 if (s->pix_fmt < 0) {
2630 ret = AVERROR(EINVAL);
2633 avctx->pix_fmt = s->pix_fmt;
2636 prev_frame = s->framep[VP56_FRAME_CURRENT];
2638 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2639 s->update_altref == VP56_FRAME_CURRENT;
2641 skip_thresh = !referenced ? AVDISCARD_NONREF
2642 : !s->keyframe ? AVDISCARD_NONKEY
2645 if (avctx->skip_frame >= skip_thresh) {
2647 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2650 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2652 // release no longer referenced frames
2653 for (i = 0; i < 5; i++)
2654 if (s->frames[i].tf.f->buf[0] &&
2655 &s->frames[i] != prev_frame &&
2656 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2657 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2658 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2659 vp8_release_frame(s, &s->frames[i]);
2661 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2664 avctx->colorspace = AVCOL_SPC_BT470BG;
2666 avctx->color_range = AVCOL_RANGE_JPEG;
2668 avctx->color_range = AVCOL_RANGE_MPEG;
2670 /* Given that arithmetic probabilities are updated every frame, it's quite
2671 * likely that the values we have on a random interframe are complete
2672 * junk if we didn't start decode on a keyframe. So just don't display
2673 * anything rather than junk. */
2674 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2675 !s->framep[VP56_FRAME_GOLDEN] ||
2676 !s->framep[VP56_FRAME_GOLDEN2])) {
2677 av_log(avctx, AV_LOG_WARNING,
2678 "Discarding interframe without a prior keyframe!\n");
2679 ret = AVERROR_INVALIDDATA;
2683 curframe->tf.f->key_frame = s->keyframe;
2684 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2685 : AV_PICTURE_TYPE_P;
2686 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2689 // check if golden and altref are swapped
2690 if (s->update_altref != VP56_FRAME_NONE)
2691 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2693 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2695 if (s->update_golden != VP56_FRAME_NONE)
2696 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2698 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2701 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2703 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2705 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2707 ff_thread_finish_setup(avctx);
2709 if (avctx->hwaccel) {
2710 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2714 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2718 ret = avctx->hwaccel->end_frame(avctx);
2723 s->linesize = curframe->tf.f->linesize[0];
2724 s->uvlinesize = curframe->tf.f->linesize[1];
2726 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2727 /* Zero macroblock structures for top/top-left prediction
2728 * from outside the frame. */
2730 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2731 (s->mb_width + 1) * sizeof(*s->macroblocks));
2732 if (!s->mb_layout && s->keyframe)
2733 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2735 memset(s->ref_count, 0, sizeof(s->ref_count));
2737 if (s->mb_layout == 1) {
2738 // Make sure the previous frame has read its segmentation map,
2739 // if we re-use the same map.
2740 if (prev_frame && s->segmentation.enabled &&
2741 !s->segmentation.update_map)
2742 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2744 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2746 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2749 if (avctx->active_thread_type == FF_THREAD_FRAME)
2752 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2753 s->num_jobs = num_jobs;
2754 s->curframe = curframe;
2755 s->prev_frame = prev_frame;
2756 s->mv_bounds.mv_min.y = -MARGIN;
2757 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2758 for (i = 0; i < MAX_THREADS; i++) {
2759 VP8ThreadData *td = &s->thread_data[i];
2760 atomic_init(&td->thread_mb_pos, 0);
2761 atomic_init(&td->wait_mb_pos, INT_MAX);
2764 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2767 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2771 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2772 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2775 // if future frames don't use the updated probabilities,
2776 // reset them to the values we saved
2777 if (!s->update_probabilities)
2778 s->prob[0] = s->prob[1];
2780 if (!s->invisible) {
2781 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2788 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2792 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2795 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2798 #if CONFIG_VP7_DECODER
2799 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2802 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2804 #endif /* CONFIG_VP7_DECODER */
2806 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2808 VP8Context *s = avctx->priv_data;
2814 vp8_decode_flush_impl(avctx, 1);
2815 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2816 av_frame_free(&s->frames[i].tf.f);
2821 static av_cold int vp8_init_frames(VP8Context *s)
2824 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2825 s->frames[i].tf.f = av_frame_alloc();
2826 if (!s->frames[i].tf.f)
2827 return AVERROR(ENOMEM);
2832 static av_always_inline
2833 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2835 VP8Context *s = avctx->priv_data;
2839 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2840 s->pix_fmt = AV_PIX_FMT_NONE;
2841 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2842 avctx->internal->allocate_progress = 1;
2844 ff_videodsp_init(&s->vdsp, 8);
2846 ff_vp78dsp_init(&s->vp8dsp);
2847 if (CONFIG_VP7_DECODER && is_vp7) {
2848 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2849 ff_vp7dsp_init(&s->vp8dsp);
2850 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2851 s->filter_mb_row = vp7_filter_mb_row;
2852 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2853 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2854 ff_vp8dsp_init(&s->vp8dsp);
2855 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2856 s->filter_mb_row = vp8_filter_mb_row;
2859 /* does not change for VP8 */
2860 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2862 if ((ret = vp8_init_frames(s)) < 0) {
2863 ff_vp8_decode_free(avctx);
2870 #if CONFIG_VP7_DECODER
2871 static int vp7_decode_init(AVCodecContext *avctx)
2873 return vp78_decode_init(avctx, IS_VP7);
2875 #endif /* CONFIG_VP7_DECODER */
2877 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2879 return vp78_decode_init(avctx, IS_VP8);
2882 #if CONFIG_VP8_DECODER
2884 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2886 VP8Context *s = avctx->priv_data;
2891 if ((ret = vp8_init_frames(s)) < 0) {
2892 ff_vp8_decode_free(avctx);
2899 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2901 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2902 const AVCodecContext *src)
2904 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2907 if (s->macroblocks_base &&
2908 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2910 s->mb_width = s_src->mb_width;
2911 s->mb_height = s_src->mb_height;
2914 s->pix_fmt = s_src->pix_fmt;
2915 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2916 s->segmentation = s_src->segmentation;
2917 s->lf_delta = s_src->lf_delta;
2918 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2920 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2921 if (s_src->frames[i].tf.f->buf[0]) {
2922 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2928 s->framep[0] = REBASE(s_src->next_framep[0]);
2929 s->framep[1] = REBASE(s_src->next_framep[1]);
2930 s->framep[2] = REBASE(s_src->next_framep[2]);
2931 s->framep[3] = REBASE(s_src->next_framep[3]);
2935 #endif /* HAVE_THREADS */
2936 #endif /* CONFIG_VP8_DECODER */
2938 #if CONFIG_VP7_DECODER
2939 AVCodec ff_vp7_decoder = {
2941 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2942 .type = AVMEDIA_TYPE_VIDEO,
2943 .id = AV_CODEC_ID_VP7,
2944 .priv_data_size = sizeof(VP8Context),
2945 .init = vp7_decode_init,
2946 .close = ff_vp8_decode_free,
2947 .decode = vp7_decode_frame,
2948 .capabilities = AV_CODEC_CAP_DR1,
2949 .flush = vp8_decode_flush,
2951 #endif /* CONFIG_VP7_DECODER */
2953 #if CONFIG_VP8_DECODER
2954 AVCodec ff_vp8_decoder = {
2956 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2957 .type = AVMEDIA_TYPE_VIDEO,
2958 .id = AV_CODEC_ID_VP8,
2959 .priv_data_size = sizeof(VP8Context),
2960 .init = ff_vp8_decode_init,
2961 .close = ff_vp8_decode_free,
2962 .decode = ff_vp8_decode_frame,
2963 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2964 AV_CODEC_CAP_SLICE_THREADS,
2965 .flush = vp8_decode_flush,
2966 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2967 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2968 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2969 #if CONFIG_VP8_VAAPI_HWACCEL
2972 #if CONFIG_VP8_NVDEC_HWACCEL
2978 #endif /* CONFIG_VP7_DECODER */