2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
50 static void free_buffers(VP8Context *s)
54 for (i = 0; i < MAX_THREADS; i++) {
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
59 av_freep(&s->thread_data[i].filter_strength);
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
67 s->macroblocks = NULL;
70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
103 #if CONFIG_VP8_DECODER
104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
108 vp8_release_frame(s, dst);
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
126 #endif /* CONFIG_VP8_DECODER */
128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 VP8Context *s = avctx->priv_data;
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
141 static void vp8_decode_flush(AVCodecContext *avctx)
143 vp8_decode_flush_impl(avctx, 0);
146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 VP8Frame *frame = NULL;
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
176 #if CONFIG_VP8_NVDEC_HWACCEL
183 return ff_get_format(s->avctx, pix_fmts);
186 static av_always_inline
187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189 AVCodecContext *avctx = s->avctx;
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
196 ret = ff_set_dimensions(s->avctx, width, height);
201 if (!s->actually_webp && !is_vp7) {
202 s->pix_fmt = get_pixel_format(s);
204 return AVERROR(EINVAL);
205 avctx->pix_fmt = s->pix_fmt;
208 s->mb_width = (s->avctx->coded_width + 15) / 16;
209 s->mb_height = (s->avctx->coded_height + 15) / 16;
211 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
212 avctx->thread_count > 1;
213 if (!s->mb_layout) { // Frame threading and one thread
214 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
215 sizeof(*s->macroblocks));
216 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
217 } else // Sliced threading
218 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
219 sizeof(*s->macroblocks));
220 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
221 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
222 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
224 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
225 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
227 return AVERROR(ENOMEM);
230 for (i = 0; i < MAX_THREADS; i++) {
231 s->thread_data[i].filter_strength =
232 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
233 if (!s->thread_data[i].filter_strength) {
235 return AVERROR(ENOMEM);
238 pthread_mutex_init(&s->thread_data[i].lock, NULL);
239 pthread_cond_init(&s->thread_data[i].cond, NULL);
243 s->macroblocks = s->macroblocks_base + 1;
248 static int vp7_update_dimensions(VP8Context *s, int width, int height)
250 return update_dimensions(s, width, height, IS_VP7);
253 static int vp8_update_dimensions(VP8Context *s, int width, int height)
255 return update_dimensions(s, width, height, IS_VP8);
259 static void parse_segment_info(VP8Context *s)
261 VP56RangeCoder *c = &s->c;
264 s->segmentation.update_map = vp8_rac_get(c);
265 s->segmentation.update_feature_data = vp8_rac_get(c);
267 if (s->segmentation.update_feature_data) {
268 s->segmentation.absolute_vals = vp8_rac_get(c);
270 for (i = 0; i < 4; i++)
271 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
273 for (i = 0; i < 4; i++)
274 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
276 if (s->segmentation.update_map)
277 for (i = 0; i < 3; i++)
278 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
281 static void update_lf_deltas(VP8Context *s)
283 VP56RangeCoder *c = &s->c;
286 for (i = 0; i < 4; i++) {
287 if (vp8_rac_get(c)) {
288 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
291 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
296 if (vp8_rac_get(c)) {
297 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
300 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
305 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
307 const uint8_t *sizes = buf;
311 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
313 buf += 3 * (s->num_coeff_partitions - 1);
314 buf_size -= 3 * (s->num_coeff_partitions - 1);
318 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
319 int size = AV_RL24(sizes + 3 * i);
320 if (buf_size - size < 0)
322 s->coeff_partition_size[i] = size;
324 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
331 s->coeff_partition_size[i] = buf_size;
332 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
337 static void vp7_get_quants(VP8Context *s)
339 VP56RangeCoder *c = &s->c;
341 int yac_qi = vp8_rac_get_uint(c, 7);
342 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
343 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
344 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
345 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
349 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
350 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
351 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
352 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
353 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
356 static void vp8_get_quants(VP8Context *s)
358 VP56RangeCoder *c = &s->c;
361 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
362 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
363 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
364 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
365 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
368 for (i = 0; i < 4; i++) {
369 if (s->segmentation.enabled) {
370 base_qi = s->segmentation.base_quant[i];
371 if (!s->segmentation.absolute_vals)
372 base_qi += s->quant.yac_qi;
374 base_qi = s->quant.yac_qi;
376 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
377 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
378 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
379 /* 101581>>16 is equivalent to 155/100 */
380 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
381 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
382 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
384 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
385 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
390 * Determine which buffers golden and altref should be updated with after this frame.
391 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
393 * Intra frames update all 3 references
394 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
395 * If the update (golden|altref) flag is set, it's updated with the current frame
396 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
397 * If the flag is not set, the number read means:
399 * 1: VP56_FRAME_PREVIOUS
400 * 2: update golden with altref, or update altref with golden
402 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
404 VP56RangeCoder *c = &s->c;
407 return VP56_FRAME_CURRENT;
409 switch (vp8_rac_get_uint(c, 2)) {
411 return VP56_FRAME_PREVIOUS;
413 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
415 return VP56_FRAME_NONE;
418 static void vp78_reset_probability_tables(VP8Context *s)
421 for (i = 0; i < 4; i++)
422 for (j = 0; j < 16; j++)
423 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
424 sizeof(s->prob->token[i][j]));
427 static void vp78_update_probability_tables(VP8Context *s)
429 VP56RangeCoder *c = &s->c;
432 for (i = 0; i < 4; i++)
433 for (j = 0; j < 8; j++)
434 for (k = 0; k < 3; k++)
435 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
436 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
437 int prob = vp8_rac_get_uint(c, 8);
438 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
439 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443 #define VP7_MVC_SIZE 17
444 #define VP8_MVC_SIZE 19
446 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
449 VP56RangeCoder *c = &s->c;
453 for (i = 0; i < 4; i++)
454 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
456 for (i = 0; i < 3; i++)
457 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
459 // 17.2 MV probability update
460 for (i = 0; i < 2; i++)
461 for (j = 0; j < mvc_size; j++)
462 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
463 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
466 static void update_refs(VP8Context *s)
468 VP56RangeCoder *c = &s->c;
470 int update_golden = vp8_rac_get(c);
471 int update_altref = vp8_rac_get(c);
473 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
474 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
477 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481 for (j = 1; j < 3; j++) {
482 for (i = 0; i < height / 2; i++)
483 memcpy(dst->data[j] + i * dst->linesize[j],
484 src->data[j] + i * src->linesize[j], width / 2);
488 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
489 const uint8_t *src, ptrdiff_t src_linesize,
490 int width, int height,
494 for (j = 0; j < height; j++) {
495 for (i = 0; i < width; i++) {
496 uint8_t y = src[j * src_linesize + i];
497 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
502 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
504 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
505 int beta = (int8_t) vp8_rac_get_uint(c, 8);
508 if (!s->keyframe && (alpha || beta)) {
509 int width = s->mb_width * 16;
510 int height = s->mb_height * 16;
513 if (!s->framep[VP56_FRAME_PREVIOUS] ||
514 !s->framep[VP56_FRAME_GOLDEN]) {
515 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
516 return AVERROR_INVALIDDATA;
520 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
522 /* preserve the golden frame, write a new previous frame */
523 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
524 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
525 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
528 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
530 copy_chroma(dst, src, width, height);
533 fade(dst->data[0], dst->linesize[0],
534 src->data[0], src->linesize[0],
535 width, height, alpha, beta);
541 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
543 VP56RangeCoder *c = &s->c;
544 int part1_size, hscale, vscale, i, j, ret;
545 int width = s->avctx->width;
546 int height = s->avctx->height;
549 return AVERROR_INVALIDDATA;
552 s->profile = (buf[0] >> 1) & 7;
553 if (s->profile > 1) {
554 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
555 return AVERROR_INVALIDDATA;
558 s->keyframe = !(buf[0] & 1);
560 part1_size = AV_RL24(buf) >> 4;
562 if (buf_size < 4 - s->profile + part1_size) {
563 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
564 return AVERROR_INVALIDDATA;
567 buf += 4 - s->profile;
568 buf_size -= 4 - s->profile;
570 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
572 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
576 buf_size -= part1_size;
578 /* A. Dimension information (keyframes only) */
580 width = vp8_rac_get_uint(c, 12);
581 height = vp8_rac_get_uint(c, 12);
582 hscale = vp8_rac_get_uint(c, 2);
583 vscale = vp8_rac_get_uint(c, 2);
584 if (hscale || vscale)
585 avpriv_request_sample(s->avctx, "Upscaling");
587 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
588 vp78_reset_probability_tables(s);
589 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
590 sizeof(s->prob->pred16x16));
591 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
592 sizeof(s->prob->pred8x8c));
593 for (i = 0; i < 2; i++)
594 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
595 sizeof(vp7_mv_default_prob[i]));
596 memset(&s->segmentation, 0, sizeof(s->segmentation));
597 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
598 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
601 if (s->keyframe || s->profile > 0)
602 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
604 /* B. Decoding information for all four macroblock-level features */
605 for (i = 0; i < 4; i++) {
606 s->feature_enabled[i] = vp8_rac_get(c);
607 if (s->feature_enabled[i]) {
608 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
610 for (j = 0; j < 3; j++)
611 s->feature_index_prob[i][j] =
612 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
614 if (vp7_feature_value_size[s->profile][i])
615 for (j = 0; j < 4; j++)
616 s->feature_value[i][j] =
617 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
621 s->segmentation.enabled = 0;
622 s->segmentation.update_map = 0;
623 s->lf_delta.enabled = 0;
625 s->num_coeff_partitions = 1;
626 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
630 if (!s->macroblocks_base || /* first frame */
631 width != s->avctx->width || height != s->avctx->height ||
632 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
633 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
637 /* C. Dequantization indices */
640 /* D. Golden frame update flag (a Flag) for interframes only */
642 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
643 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
647 s->update_probabilities = 1;
650 if (s->profile > 0) {
651 s->update_probabilities = vp8_rac_get(c);
652 if (!s->update_probabilities)
653 s->prob[1] = s->prob[0];
656 s->fade_present = vp8_rac_get(c);
659 if (c->end <= c->buffer && c->bits >= 0)
660 return AVERROR_INVALIDDATA;
661 /* E. Fading information for previous frame */
662 if (s->fade_present && vp8_rac_get(c)) {
663 if ((ret = vp7_fade_frame(s ,c)) < 0)
667 /* F. Loop filter type */
669 s->filter.simple = vp8_rac_get(c);
671 /* G. DCT coefficient ordering specification */
673 for (i = 1; i < 16; i++)
674 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
676 /* H. Loop filter levels */
678 s->filter.simple = vp8_rac_get(c);
679 s->filter.level = vp8_rac_get_uint(c, 6);
680 s->filter.sharpness = vp8_rac_get_uint(c, 3);
682 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
683 vp78_update_probability_tables(s);
685 s->mbskip_enabled = 0;
687 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
689 s->prob->intra = vp8_rac_get_uint(c, 8);
690 s->prob->last = vp8_rac_get_uint(c, 8);
691 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
697 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
699 VP56RangeCoder *c = &s->c;
700 int header_size, hscale, vscale, ret;
701 int width = s->avctx->width;
702 int height = s->avctx->height;
705 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
706 return AVERROR_INVALIDDATA;
709 s->keyframe = !(buf[0] & 1);
710 s->profile = (buf[0]>>1) & 7;
711 s->invisible = !(buf[0] & 0x10);
712 header_size = AV_RL24(buf) >> 5;
716 s->header_partition_size = header_size;
719 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
722 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
723 sizeof(s->put_pixels_tab));
724 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
725 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
726 sizeof(s->put_pixels_tab));
728 if (header_size > buf_size - 7 * s->keyframe) {
729 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
730 return AVERROR_INVALIDDATA;
734 if (AV_RL24(buf) != 0x2a019d) {
735 av_log(s->avctx, AV_LOG_ERROR,
736 "Invalid start code 0x%x\n", AV_RL24(buf));
737 return AVERROR_INVALIDDATA;
739 width = AV_RL16(buf + 3) & 0x3fff;
740 height = AV_RL16(buf + 5) & 0x3fff;
741 hscale = buf[4] >> 6;
742 vscale = buf[6] >> 6;
746 if (hscale || vscale)
747 avpriv_request_sample(s->avctx, "Upscaling");
749 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
750 vp78_reset_probability_tables(s);
751 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
752 sizeof(s->prob->pred16x16));
753 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
754 sizeof(s->prob->pred8x8c));
755 memcpy(s->prob->mvc, vp8_mv_default_prob,
756 sizeof(s->prob->mvc));
757 memset(&s->segmentation, 0, sizeof(s->segmentation));
758 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
761 ret = ff_vp56_init_range_decoder(c, buf, header_size);
765 buf_size -= header_size;
768 s->colorspace = vp8_rac_get(c);
770 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
771 s->fullrange = vp8_rac_get(c);
774 if ((s->segmentation.enabled = vp8_rac_get(c)))
775 parse_segment_info(s);
777 s->segmentation.update_map = 0; // FIXME: move this to some init function?
779 s->filter.simple = vp8_rac_get(c);
780 s->filter.level = vp8_rac_get_uint(c, 6);
781 s->filter.sharpness = vp8_rac_get_uint(c, 3);
783 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
784 s->lf_delta.update = vp8_rac_get(c);
785 if (s->lf_delta.update)
789 if (setup_partitions(s, buf, buf_size)) {
790 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
791 return AVERROR_INVALIDDATA;
794 if (!s->macroblocks_base || /* first frame */
795 width != s->avctx->width || height != s->avctx->height ||
796 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
797 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
804 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
805 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
808 // if we aren't saving this frame's probabilities for future frames,
809 // make a copy of the current probabilities
810 if (!(s->update_probabilities = vp8_rac_get(c)))
811 s->prob[1] = s->prob[0];
813 s->update_last = s->keyframe || vp8_rac_get(c);
815 vp78_update_probability_tables(s);
817 if ((s->mbskip_enabled = vp8_rac_get(c)))
818 s->prob->mbskip = vp8_rac_get_uint(c, 8);
821 s->prob->intra = vp8_rac_get_uint(c, 8);
822 s->prob->last = vp8_rac_get_uint(c, 8);
823 s->prob->golden = vp8_rac_get_uint(c, 8);
824 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
827 // Record the entropy coder state here so that hwaccels can use it.
828 s->c.code_word = vp56_rac_renorm(&s->c);
829 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
830 s->coder_state_at_header_end.range = s->c.high;
831 s->coder_state_at_header_end.value = s->c.code_word >> 16;
832 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
837 static av_always_inline
838 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
840 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
841 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
842 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
843 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
847 * Motion vector coding, 17.1.
849 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
853 if (vp56_rac_get_prob_branchy(c, p[0])) {
856 for (i = 0; i < 3; i++)
857 x += vp56_rac_get_prob(c, p[9 + i]) << i;
858 for (i = (vp7 ? 7 : 9); i > 3; i--)
859 x += vp56_rac_get_prob(c, p[9 + i]) << i;
860 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
864 const uint8_t *ps = p + 2;
865 bit = vp56_rac_get_prob(c, *ps);
868 bit = vp56_rac_get_prob(c, *ps);
871 x += vp56_rac_get_prob(c, *ps);
874 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
877 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
879 return read_mv_component(c, p, 1);
882 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
884 return read_mv_component(c, p, 0);
887 static av_always_inline
888 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
891 return vp7_submv_prob;
894 return vp8_submv_prob[4 - !!left];
896 return vp8_submv_prob[2];
897 return vp8_submv_prob[1 - !!left];
901 * Split motion vector prediction, 16.4.
902 * @returns the number of motion vectors parsed (2, 4 or 16)
904 static av_always_inline
905 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
906 int layout, int is_vp7)
910 VP8Macroblock *top_mb;
911 VP8Macroblock *left_mb = &mb[-1];
912 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
913 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
915 VP56mv *left_mv = left_mb->bmv;
916 VP56mv *cur_mv = mb->bmv;
918 if (!layout) // layout is inlined, s->mb_layout is not
921 top_mb = &mb[-s->mb_width - 1];
922 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
923 top_mv = top_mb->bmv;
925 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
926 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
927 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
929 part_idx = VP8_SPLITMVMODE_8x8;
931 part_idx = VP8_SPLITMVMODE_4x4;
934 num = vp8_mbsplit_count[part_idx];
935 mbsplits_cur = vp8_mbsplits[part_idx],
936 firstidx = vp8_mbfirstidx[part_idx];
937 mb->partitioning = part_idx;
939 for (n = 0; n < num; n++) {
941 uint32_t left, above;
942 const uint8_t *submv_prob;
945 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
947 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
949 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
951 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
953 submv_prob = get_submv_prob(left, above, is_vp7);
955 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
956 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
957 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
958 mb->bmv[n].y = mb->mv.y +
959 read_mv_component(c, s->prob->mvc[0], is_vp7);
960 mb->bmv[n].x = mb->mv.x +
961 read_mv_component(c, s->prob->mvc[1], is_vp7);
963 AV_ZERO32(&mb->bmv[n]);
966 AV_WN32A(&mb->bmv[n], above);
969 AV_WN32A(&mb->bmv[n], left);
977 * The vp7 reference decoder uses a padding macroblock column (added to right
978 * edge of the frame) to guard against illegal macroblock offsets. The
979 * algorithm has bugs that permit offsets to straddle the padding column.
980 * This function replicates those bugs.
982 * @param[out] edge_x macroblock x address
983 * @param[out] edge_y macroblock y address
985 * @return macroblock offset legal (boolean)
987 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
988 int xoffset, int yoffset, int boundary,
989 int *edge_x, int *edge_y)
991 int vwidth = mb_width + 1;
992 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
993 if (new < boundary || new % vwidth == vwidth - 1)
995 *edge_y = new / vwidth;
996 *edge_x = new % vwidth;
1000 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1002 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1005 static av_always_inline
1006 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1007 int mb_x, int mb_y, int layout)
1009 VP8Macroblock *mb_edge[12];
1010 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1011 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1014 uint8_t cnt[3] = { 0 };
1015 VP56RangeCoder *c = &s->c;
1018 AV_ZERO32(&near_mv[0]);
1019 AV_ZERO32(&near_mv[1]);
1020 AV_ZERO32(&near_mv[2]);
1022 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1023 const VP7MVPred * pred = &vp7_mv_pred[i];
1026 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1027 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1028 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1029 ? s->macroblocks_base + 1 + edge_x +
1030 (s->mb_width + 1) * (edge_y + 1)
1031 : s->macroblocks + edge_x +
1032 (s->mb_height - edge_y - 1) * 2;
1033 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1035 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1036 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1038 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1039 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1043 AV_WN32A(&near_mv[CNT_NEAR], mv);
1047 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1056 cnt[idx] += vp7_mv_pred[i].score;
1059 mb->partitioning = VP8_SPLITMVMODE_NONE;
1061 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1062 mb->mode = VP8_MVMODE_MV;
1064 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1066 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1068 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1069 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1071 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1073 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1074 mb->mode = VP8_MVMODE_SPLIT;
1075 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1077 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1078 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1079 mb->bmv[0] = mb->mv;
1082 mb->mv = near_mv[CNT_NEAR];
1083 mb->bmv[0] = mb->mv;
1086 mb->mv = near_mv[CNT_NEAREST];
1087 mb->bmv[0] = mb->mv;
1090 mb->mode = VP8_MVMODE_ZERO;
1092 mb->bmv[0] = mb->mv;
1096 static av_always_inline
1097 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1098 int mb_x, int mb_y, int layout)
1100 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1103 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1104 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1106 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1107 int8_t *sign_bias = s->sign_bias;
1109 uint8_t cnt[4] = { 0 };
1110 VP56RangeCoder *c = &s->c;
1112 if (!layout) { // layout is inlined (s->mb_layout is not)
1113 mb_edge[0] = mb + 2;
1114 mb_edge[2] = mb + 1;
1116 mb_edge[0] = mb - s->mb_width - 1;
1117 mb_edge[2] = mb - s->mb_width - 2;
1120 AV_ZERO32(&near_mv[0]);
1121 AV_ZERO32(&near_mv[1]);
1122 AV_ZERO32(&near_mv[2]);
1124 /* Process MB on top, left and top-left */
1125 #define MV_EDGE_CHECK(n) \
1127 VP8Macroblock *edge = mb_edge[n]; \
1128 int edge_ref = edge->ref_frame; \
1129 if (edge_ref != VP56_FRAME_CURRENT) { \
1130 uint32_t mv = AV_RN32A(&edge->mv); \
1132 if (cur_sign_bias != sign_bias[edge_ref]) { \
1133 /* SWAR negate of the values in mv. */ \
1135 mv = ((mv & 0x7fff7fff) + \
1136 0x00010001) ^ (mv & 0x80008000); \
1138 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1139 AV_WN32A(&near_mv[++idx], mv); \
1140 cnt[idx] += 1 + (n != 2); \
1142 cnt[CNT_ZERO] += 1 + (n != 2); \
1150 mb->partitioning = VP8_SPLITMVMODE_NONE;
1151 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1152 mb->mode = VP8_MVMODE_MV;
1154 /* If we have three distinct MVs, merge first and last if they're the same */
1155 if (cnt[CNT_SPLITMV] &&
1156 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1157 cnt[CNT_NEAREST] += 1;
1159 /* Swap near and nearest if necessary */
1160 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1161 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1162 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1165 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1166 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1167 /* Choose the best mv out of 0,0 and the nearest mv */
1168 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1169 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1170 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1171 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1173 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1174 mb->mode = VP8_MVMODE_SPLIT;
1175 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1177 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1178 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1179 mb->bmv[0] = mb->mv;
1182 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1183 mb->bmv[0] = mb->mv;
1186 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1187 mb->bmv[0] = mb->mv;
1190 mb->mode = VP8_MVMODE_ZERO;
1192 mb->bmv[0] = mb->mv;
1196 static av_always_inline
1197 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1198 int mb_x, int keyframe, int layout)
1200 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1203 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1204 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1209 uint8_t *const left = s->intra4x4_pred_mode_left;
1211 top = mb->intra4x4_pred_mode_top;
1213 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1214 for (y = 0; y < 4; y++) {
1215 for (x = 0; x < 4; x++) {
1217 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1218 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1219 left[y] = top[x] = *intra4x4;
1225 for (i = 0; i < 16; i++)
1226 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1227 vp8_pred4x4_prob_inter);
1231 static av_always_inline
1232 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1233 VP8Macroblock *mb, int mb_x, int mb_y,
1234 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1236 VP56RangeCoder *c = &s->c;
1237 static const char * const vp7_feature_name[] = { "q-index",
1239 "partial-golden-update",
1244 for (i = 0; i < 4; i++) {
1245 if (s->feature_enabled[i]) {
1246 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1247 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1248 s->feature_index_prob[i]);
1249 av_log(s->avctx, AV_LOG_WARNING,
1250 "Feature %s present in macroblock (value 0x%x)\n",
1251 vp7_feature_name[i], s->feature_value[i][index]);
1255 } else if (s->segmentation.update_map) {
1256 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1257 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1258 } else if (s->segmentation.enabled)
1259 *segment = ref ? *ref : *segment;
1260 mb->segment = *segment;
1262 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1265 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1266 vp8_pred16x16_prob_intra);
1268 if (mb->mode == MODE_I4x4) {
1269 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1271 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1272 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1274 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1276 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1277 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1280 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1281 vp8_pred8x8c_prob_intra);
1282 mb->ref_frame = VP56_FRAME_CURRENT;
1283 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1285 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1287 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1288 : VP56_FRAME_GOLDEN;
1290 mb->ref_frame = VP56_FRAME_PREVIOUS;
1291 s->ref_count[mb->ref_frame - 1]++;
1293 // motion vectors, 16.3
1295 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1297 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1300 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1302 if (mb->mode == MODE_I4x4)
1303 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1305 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1307 mb->ref_frame = VP56_FRAME_CURRENT;
1308 mb->partitioning = VP8_SPLITMVMODE_NONE;
1309 AV_ZERO32(&mb->bmv[0]);
1314 * @param r arithmetic bitstream reader context
1315 * @param block destination for block coefficients
1316 * @param probs probabilities to use when reading trees from the bitstream
1317 * @param i initial coeff index, 0 unless a separate DC block is coded
1318 * @param qmul array holding the dc/ac dequant factor at position 0/1
1320 * @return 0 if no coeffs were decoded
1321 * otherwise, the index of the last coeff decoded plus one
1323 static av_always_inline
1324 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1325 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1326 int i, uint8_t *token_prob, int16_t qmul[2],
1327 const uint8_t scan[16], int vp7)
1329 VP56RangeCoder c = *r;
1334 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1338 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1340 break; // invalid input; blocks should end with EOB
1341 token_prob = probs[i][0];
1347 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1349 token_prob = probs[i + 1][1];
1351 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1352 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1354 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1358 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1359 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1360 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1361 } else { // DCT_CAT2
1363 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1364 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1366 } else { // DCT_CAT3 and up
1367 int a = vp56_rac_get_prob(&c, token_prob[8]);
1368 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1369 int cat = (a << 1) + b;
1370 coeff = 3 + (8 << cat);
1371 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1374 token_prob = probs[i + 1][2];
1376 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1383 static av_always_inline
1384 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1386 int16_t dc = block[0];
1394 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1395 block[0] = pred[0] = dc;
1400 block[0] = pred[0] = dc;
1406 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1408 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1409 int i, uint8_t *token_prob,
1411 const uint8_t scan[16])
1413 return decode_block_coeffs_internal(r, block, probs, i,
1414 token_prob, qmul, scan, IS_VP7);
1417 #ifndef vp8_decode_block_coeffs_internal
1418 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1420 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1421 int i, uint8_t *token_prob,
1424 return decode_block_coeffs_internal(r, block, probs, i,
1425 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1430 * @param c arithmetic bitstream reader context
1431 * @param block destination for block coefficients
1432 * @param probs probabilities to use when reading trees from the bitstream
1433 * @param i initial coeff index, 0 unless a separate DC block is coded
1434 * @param zero_nhood the initial prediction context for number of surrounding
1435 * all-zero blocks (only left/top, so 0-2)
1436 * @param qmul array holding the dc/ac dequant factor at position 0/1
1437 * @param scan scan pattern (VP7 only)
1439 * @return 0 if no coeffs were decoded
1440 * otherwise, the index of the last coeff decoded plus one
1442 static av_always_inline
1443 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1444 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1445 int i, int zero_nhood, int16_t qmul[2],
1446 const uint8_t scan[16], int vp7)
1448 uint8_t *token_prob = probs[i][zero_nhood];
1449 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1451 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1452 token_prob, qmul, scan)
1453 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1457 static av_always_inline
1458 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1459 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1462 int i, x, y, luma_start = 0, luma_ctx = 3;
1463 int nnz_pred, nnz, nnz_total = 0;
1464 int segment = mb->segment;
1467 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1468 nnz_pred = t_nnz[8] + l_nnz[8];
1470 // decode DC values and do hadamard
1471 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1472 nnz_pred, s->qmat[segment].luma_dc_qmul,
1473 ff_zigzag_scan, is_vp7);
1474 l_nnz[8] = t_nnz[8] = !!nnz;
1476 if (is_vp7 && mb->mode > MODE_I4x4) {
1477 nnz |= inter_predict_dc(td->block_dc,
1478 s->inter_dc_pred[mb->ref_frame - 1]);
1485 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1487 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1494 for (y = 0; y < 4; y++)
1495 for (x = 0; x < 4; x++) {
1496 nnz_pred = l_nnz[y] + t_nnz[x];
1497 nnz = decode_block_coeffs(c, td->block[y][x],
1498 s->prob->token[luma_ctx],
1499 luma_start, nnz_pred,
1500 s->qmat[segment].luma_qmul,
1501 s->prob[0].scan, is_vp7);
1502 /* nnz+block_dc may be one more than the actual last index,
1503 * but we don't care */
1504 td->non_zero_count_cache[y][x] = nnz + block_dc;
1505 t_nnz[x] = l_nnz[y] = !!nnz;
1510 // TODO: what to do about dimensions? 2nd dim for luma is x,
1511 // but for chroma it's (y<<1)|x
1512 for (i = 4; i < 6; i++)
1513 for (y = 0; y < 2; y++)
1514 for (x = 0; x < 2; x++) {
1515 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1516 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1517 s->prob->token[2], 0, nnz_pred,
1518 s->qmat[segment].chroma_qmul,
1519 s->prob[0].scan, is_vp7);
1520 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1521 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1525 // if there were no coded coeffs despite the macroblock not being marked skip,
1526 // we MUST not do the inner loop filter and should not do IDCT
1527 // Since skip isn't used for bitstream prediction, just manually set it.
1532 static av_always_inline
1533 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1534 uint8_t *src_cb, uint8_t *src_cr,
1535 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1537 AV_COPY128(top_border, src_y + 15 * linesize);
1539 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1540 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1544 static av_always_inline
1545 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1546 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1547 int mb_y, int mb_width, int simple, int xchg)
1549 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1551 src_cb -= uvlinesize;
1552 src_cr -= uvlinesize;
1554 #define XCHG(a, b, xchg) \
1562 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1563 XCHG(top_border, src_y, xchg);
1564 XCHG(top_border + 8, src_y + 8, 1);
1565 if (mb_x < mb_width - 1)
1566 XCHG(top_border + 32, src_y + 16, 1);
1568 // only copy chroma for normal loop filter
1569 // or to initialize the top row to 127
1570 if (!simple || !mb_y) {
1571 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1572 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1573 XCHG(top_border + 16, src_cb, 1);
1574 XCHG(top_border + 24, src_cr, 1);
1578 static av_always_inline
1579 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1582 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1584 return mb_y ? mode : LEFT_DC_PRED8x8;
1587 static av_always_inline
1588 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1591 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1593 return mb_y ? mode : HOR_PRED8x8;
1596 static av_always_inline
1597 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1601 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1603 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1605 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1606 case PLANE_PRED8x8: /* TM */
1607 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1612 static av_always_inline
1613 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1616 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1618 return mb_y ? mode : HOR_VP8_PRED;
1622 static av_always_inline
1623 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1624 int *copy_buf, int vp7)
1628 if (!mb_x && mb_y) {
1633 case DIAG_DOWN_LEFT_PRED:
1634 case VERT_LEFT_PRED:
1635 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1643 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1645 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1646 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1647 * as 16x16/8x8 DC */
1648 case DIAG_DOWN_RIGHT_PRED:
1649 case VERT_RIGHT_PRED:
1658 static av_always_inline
1659 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1660 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1662 int x, y, mode, nnz;
1665 /* for the first row, we need to run xchg_mb_border to init the top edge
1666 * to 127 otherwise, skip it if we aren't going to deblock */
1667 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1668 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1669 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1670 s->filter.simple, 1);
1672 if (mb->mode < MODE_I4x4) {
1673 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1674 s->hpc.pred16x16[mode](dst[0], s->linesize);
1676 uint8_t *ptr = dst[0];
1677 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1678 const uint8_t lo = is_vp7 ? 128 : 127;
1679 const uint8_t hi = is_vp7 ? 128 : 129;
1680 uint8_t tr_top[4] = { lo, lo, lo, lo };
1682 // all blocks on the right edge of the macroblock use bottom edge
1683 // the top macroblock for their topright edge
1684 uint8_t *tr_right = ptr - s->linesize + 16;
1686 // if we're on the right edge of the frame, said edge is extended
1687 // from the top macroblock
1688 if (mb_y && mb_x == s->mb_width - 1) {
1689 tr = tr_right[-1] * 0x01010101u;
1690 tr_right = (uint8_t *) &tr;
1694 AV_ZERO128(td->non_zero_count_cache);
1696 for (y = 0; y < 4; y++) {
1697 uint8_t *topright = ptr + 4 - s->linesize;
1698 for (x = 0; x < 4; x++) {
1700 ptrdiff_t linesize = s->linesize;
1701 uint8_t *dst = ptr + 4 * x;
1702 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1704 if ((y == 0 || x == 3) && mb_y == 0) {
1707 topright = tr_right;
1709 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1710 mb_y + y, ©, is_vp7);
1712 dst = copy_dst + 12;
1716 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1718 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1722 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1731 copy_dst[11] = ptr[4 * x - 1];
1732 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1733 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1734 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1737 s->hpc.pred4x4[mode](dst, topright, linesize);
1739 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1740 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1741 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1742 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1745 nnz = td->non_zero_count_cache[y][x];
1748 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1749 td->block[y][x], s->linesize);
1751 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1752 td->block[y][x], s->linesize);
1757 ptr += 4 * s->linesize;
1762 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1763 mb_x, mb_y, is_vp7);
1764 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1765 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1767 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1768 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1769 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1770 s->filter.simple, 0);
1773 static const uint8_t subpel_idx[3][8] = {
1774 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1775 // also function pointer index
1776 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1777 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1783 * @param s VP8 decoding context
1784 * @param dst target buffer for block data at block position
1785 * @param ref reference picture buffer at origin (0, 0)
1786 * @param mv motion vector (relative to block position) to get pixel data from
1787 * @param x_off horizontal position of block from origin (0, 0)
1788 * @param y_off vertical position of block from origin (0, 0)
1789 * @param block_w width of block (16, 8 or 4)
1790 * @param block_h height of block (always same as block_w)
1791 * @param width width of src/dst plane data
1792 * @param height height of src/dst plane data
1793 * @param linesize size of a single line of plane data, including padding
1794 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1796 static av_always_inline
1797 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1798 ThreadFrame *ref, const VP56mv *mv,
1799 int x_off, int y_off, int block_w, int block_h,
1800 int width, int height, ptrdiff_t linesize,
1801 vp8_mc_func mc_func[3][3])
1803 uint8_t *src = ref->f->data[0];
1806 ptrdiff_t src_linesize = linesize;
1808 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1809 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1811 x_off += mv->x >> 2;
1812 y_off += mv->y >> 2;
1815 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1816 src += y_off * linesize + x_off;
1817 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1818 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1819 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1820 src - my_idx * linesize - mx_idx,
1821 EDGE_EMU_LINESIZE, linesize,
1822 block_w + subpel_idx[1][mx],
1823 block_h + subpel_idx[1][my],
1824 x_off - mx_idx, y_off - my_idx,
1826 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1827 src_linesize = EDGE_EMU_LINESIZE;
1829 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1831 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1832 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1833 linesize, block_h, 0, 0);
1838 * chroma MC function
1840 * @param s VP8 decoding context
1841 * @param dst1 target buffer for block data at block position (U plane)
1842 * @param dst2 target buffer for block data at block position (V plane)
1843 * @param ref reference picture buffer at origin (0, 0)
1844 * @param mv motion vector (relative to block position) to get pixel data from
1845 * @param x_off horizontal position of block from origin (0, 0)
1846 * @param y_off vertical position of block from origin (0, 0)
1847 * @param block_w width of block (16, 8 or 4)
1848 * @param block_h height of block (always same as block_w)
1849 * @param width width of src/dst plane data
1850 * @param height height of src/dst plane data
1851 * @param linesize size of a single line of plane data, including padding
1852 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1854 static av_always_inline
1855 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1856 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1857 int x_off, int y_off, int block_w, int block_h,
1858 int width, int height, ptrdiff_t linesize,
1859 vp8_mc_func mc_func[3][3])
1861 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1864 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1865 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1867 x_off += mv->x >> 3;
1868 y_off += mv->y >> 3;
1871 src1 += y_off * linesize + x_off;
1872 src2 += y_off * linesize + x_off;
1873 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1874 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1875 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1876 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1877 src1 - my_idx * linesize - mx_idx,
1878 EDGE_EMU_LINESIZE, linesize,
1879 block_w + subpel_idx[1][mx],
1880 block_h + subpel_idx[1][my],
1881 x_off - mx_idx, y_off - my_idx, width, height);
1882 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1883 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1885 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1886 src2 - my_idx * linesize - mx_idx,
1887 EDGE_EMU_LINESIZE, linesize,
1888 block_w + subpel_idx[1][mx],
1889 block_h + subpel_idx[1][my],
1890 x_off - mx_idx, y_off - my_idx, width, height);
1891 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1892 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1894 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1895 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1898 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1899 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1900 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1904 static av_always_inline
1905 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1906 ThreadFrame *ref_frame, int x_off, int y_off,
1907 int bx_off, int by_off, int block_w, int block_h,
1908 int width, int height, VP56mv *mv)
1913 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1914 ref_frame, mv, x_off + bx_off, y_off + by_off,
1915 block_w, block_h, width, height, s->linesize,
1916 s->put_pixels_tab[block_w == 8]);
1919 if (s->profile == 3) {
1920 /* this block only applies VP8; it is safe to check
1921 * only the profile, as VP7 profile <= 1 */
1933 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1934 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1935 &uvmv, x_off + bx_off, y_off + by_off,
1936 block_w, block_h, width, height, s->uvlinesize,
1937 s->put_pixels_tab[1 + (block_w == 4)]);
1940 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1941 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1942 static av_always_inline
1943 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1946 /* Don't prefetch refs that haven't been used very often this frame. */
1947 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1948 int x_off = mb_x << 4, y_off = mb_y << 4;
1949 int mx = (mb->mv.x >> 2) + x_off + 8;
1950 int my = (mb->mv.y >> 2) + y_off;
1951 uint8_t **src = s->framep[ref]->tf.f->data;
1952 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1953 /* For threading, a ff_thread_await_progress here might be useful, but
1954 * it actually slows down the decoder. Since a bad prefetch doesn't
1955 * generate bad decoder output, we don't run it here. */
1956 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1957 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1958 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1963 * Apply motion vectors to prediction buffer, chapter 18.
1965 static av_always_inline
1966 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1967 VP8Macroblock *mb, int mb_x, int mb_y)
1969 int x_off = mb_x << 4, y_off = mb_y << 4;
1970 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1971 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1972 VP56mv *bmv = mb->bmv;
1974 switch (mb->partitioning) {
1975 case VP8_SPLITMVMODE_NONE:
1976 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1977 0, 0, 16, 16, width, height, &mb->mv);
1979 case VP8_SPLITMVMODE_4x4: {
1984 for (y = 0; y < 4; y++) {
1985 for (x = 0; x < 4; x++) {
1986 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1987 ref, &bmv[4 * y + x],
1988 4 * x + x_off, 4 * y + y_off, 4, 4,
1989 width, height, s->linesize,
1990 s->put_pixels_tab[2]);
1999 for (y = 0; y < 2; y++) {
2000 for (x = 0; x < 2; x++) {
2001 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2002 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2003 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2004 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2005 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2006 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2007 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2008 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2009 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2010 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2011 if (s->profile == 3) {
2015 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2016 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2017 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2018 width, height, s->uvlinesize,
2019 s->put_pixels_tab[2]);
2024 case VP8_SPLITMVMODE_16x8:
2025 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2026 0, 0, 16, 8, width, height, &bmv[0]);
2027 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2028 0, 8, 16, 8, width, height, &bmv[1]);
2030 case VP8_SPLITMVMODE_8x16:
2031 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2032 0, 0, 8, 16, width, height, &bmv[0]);
2033 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2034 8, 0, 8, 16, width, height, &bmv[1]);
2036 case VP8_SPLITMVMODE_8x8:
2037 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2038 0, 0, 8, 8, width, height, &bmv[0]);
2039 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040 8, 0, 8, 8, width, height, &bmv[1]);
2041 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042 0, 8, 8, 8, width, height, &bmv[2]);
2043 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2044 8, 8, 8, 8, width, height, &bmv[3]);
2049 static av_always_inline
2050 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2054 if (mb->mode != MODE_I4x4) {
2055 uint8_t *y_dst = dst[0];
2056 for (y = 0; y < 4; y++) {
2057 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2059 if (nnz4 & ~0x01010101) {
2060 for (x = 0; x < 4; x++) {
2061 if ((uint8_t) nnz4 == 1)
2062 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2065 else if ((uint8_t) nnz4 > 1)
2066 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2074 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2077 y_dst += 4 * s->linesize;
2081 for (ch = 0; ch < 2; ch++) {
2082 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2084 uint8_t *ch_dst = dst[1 + ch];
2085 if (nnz4 & ~0x01010101) {
2086 for (y = 0; y < 2; y++) {
2087 for (x = 0; x < 2; x++) {
2088 if ((uint8_t) nnz4 == 1)
2089 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2090 td->block[4 + ch][(y << 1) + x],
2092 else if ((uint8_t) nnz4 > 1)
2093 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2094 td->block[4 + ch][(y << 1) + x],
2098 goto chroma_idct_end;
2100 ch_dst += 4 * s->uvlinesize;
2103 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2111 static av_always_inline
2112 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2113 VP8FilterStrength *f, int is_vp7)
2115 int interior_limit, filter_level;
2117 if (s->segmentation.enabled) {
2118 filter_level = s->segmentation.filter_level[mb->segment];
2119 if (!s->segmentation.absolute_vals)
2120 filter_level += s->filter.level;
2122 filter_level = s->filter.level;
2124 if (s->lf_delta.enabled) {
2125 filter_level += s->lf_delta.ref[mb->ref_frame];
2126 filter_level += s->lf_delta.mode[mb->mode];
2129 filter_level = av_clip_uintp2(filter_level, 6);
2131 interior_limit = filter_level;
2132 if (s->filter.sharpness) {
2133 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2134 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2136 interior_limit = FFMAX(interior_limit, 1);
2138 f->filter_level = filter_level;
2139 f->inner_limit = interior_limit;
2140 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2141 mb->mode == VP8_MVMODE_SPLIT;
2144 static av_always_inline
2145 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2146 int mb_x, int mb_y, int is_vp7)
2148 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2149 int filter_level = f->filter_level;
2150 int inner_limit = f->inner_limit;
2151 int inner_filter = f->inner_filter;
2152 ptrdiff_t linesize = s->linesize;
2153 ptrdiff_t uvlinesize = s->uvlinesize;
2154 static const uint8_t hev_thresh_lut[2][64] = {
2155 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2156 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2157 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2159 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2160 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2161 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2169 bedge_lim_y = filter_level;
2170 bedge_lim_uv = filter_level * 2;
2171 mbedge_lim = filter_level + 2;
2174 bedge_lim_uv = filter_level * 2 + inner_limit;
2175 mbedge_lim = bedge_lim_y + 4;
2178 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2181 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2182 mbedge_lim, inner_limit, hev_thresh);
2183 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2184 mbedge_lim, inner_limit, hev_thresh);
2187 #define H_LOOP_FILTER_16Y_INNER(cond) \
2188 if (cond && inner_filter) { \
2189 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2190 bedge_lim_y, inner_limit, \
2192 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2193 bedge_lim_y, inner_limit, \
2195 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2196 bedge_lim_y, inner_limit, \
2198 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2199 uvlinesize, bedge_lim_uv, \
2200 inner_limit, hev_thresh); \
2203 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2206 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2207 mbedge_lim, inner_limit, hev_thresh);
2208 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2209 mbedge_lim, inner_limit, hev_thresh);
2213 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2214 linesize, bedge_lim_y,
2215 inner_limit, hev_thresh);
2216 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2217 linesize, bedge_lim_y,
2218 inner_limit, hev_thresh);
2219 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2220 linesize, bedge_lim_y,
2221 inner_limit, hev_thresh);
2222 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2223 dst[2] + 4 * uvlinesize,
2224 uvlinesize, bedge_lim_uv,
2225 inner_limit, hev_thresh);
2228 H_LOOP_FILTER_16Y_INNER(is_vp7)
2231 static av_always_inline
2232 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2235 int mbedge_lim, bedge_lim;
2236 int filter_level = f->filter_level;
2237 int inner_limit = f->inner_limit;
2238 int inner_filter = f->inner_filter;
2239 ptrdiff_t linesize = s->linesize;
2244 bedge_lim = 2 * filter_level + inner_limit;
2245 mbedge_lim = bedge_lim + 4;
2248 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2250 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2251 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2252 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2256 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2258 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2259 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2260 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2264 #define MARGIN (16 << 2)
2265 static av_always_inline
2266 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2267 VP8Frame *prev_frame, int is_vp7)
2269 VP8Context *s = avctx->priv_data;
2272 s->mv_bounds.mv_min.y = -MARGIN;
2273 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2274 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2275 VP8Macroblock *mb = s->macroblocks_base +
2276 ((s->mb_width + 1) * (mb_y + 1) + 1);
2277 int mb_xy = mb_y * s->mb_width;
2279 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2281 s->mv_bounds.mv_min.x = -MARGIN;
2282 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2283 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2285 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2286 DC_PRED * 0x01010101);
2287 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2288 prev_frame && prev_frame->seg_map ?
2289 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2290 s->mv_bounds.mv_min.x -= 64;
2291 s->mv_bounds.mv_max.x -= 64;
2293 s->mv_bounds.mv_min.y -= 64;
2294 s->mv_bounds.mv_max.y -= 64;
2298 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2299 VP8Frame *prev_frame)
2301 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2304 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2305 VP8Frame *prev_frame)
2307 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2311 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2313 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2314 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2315 pthread_mutex_lock(&otd->lock); \
2316 atomic_store(&td->wait_mb_pos, tmp); \
2318 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2320 pthread_cond_wait(&otd->cond, &otd->lock); \
2322 atomic_store(&td->wait_mb_pos, INT_MAX); \
2323 pthread_mutex_unlock(&otd->lock); \
2327 #define update_pos(td, mb_y, mb_x) \
2329 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2330 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2332 int is_null = !next_td || !prev_td; \
2333 int pos_check = (is_null) ? 1 : \
2334 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2335 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2336 atomic_store(&td->thread_mb_pos, pos); \
2337 if (sliced_threading && pos_check) { \
2338 pthread_mutex_lock(&td->lock); \
2339 pthread_cond_broadcast(&td->cond); \
2340 pthread_mutex_unlock(&td->lock); \
2344 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2345 #define update_pos(td, mb_y, mb_x) while(0)
2348 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2349 int jobnr, int threadnr, int is_vp7)
2351 VP8Context *s = avctx->priv_data;
2352 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2353 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2354 int mb_x, mb_xy = mb_y * s->mb_width;
2355 int num_jobs = s->num_jobs;
2356 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2357 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2360 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2361 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2362 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2365 if (c->end <= c->buffer && c->bits >= 0)
2366 return AVERROR_INVALIDDATA;
2371 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2372 if (mb_y == s->mb_height - 1)
2375 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2376 if (s->mb_layout == 1)
2377 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2379 // Make sure the previous frame has read its segmentation map,
2380 // if we re-use the same map.
2381 if (prev_frame && s->segmentation.enabled &&
2382 !s->segmentation.update_map)
2383 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2384 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2385 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2386 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2389 if (!is_vp7 || mb_y == 0)
2390 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2392 td->mv_bounds.mv_min.x = -MARGIN;
2393 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2395 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2396 if (c->end <= c->buffer && c->bits >= 0)
2397 return AVERROR_INVALIDDATA;
2398 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2399 if (prev_td != td) {
2400 if (threadnr != 0) {
2401 check_thread_pos(td, prev_td,
2402 mb_x + (is_vp7 ? 2 : 1),
2403 mb_y - (is_vp7 ? 2 : 1));
2405 check_thread_pos(td, prev_td,
2406 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2407 mb_y - (is_vp7 ? 2 : 1));
2411 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2413 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2414 dst[2] - dst[1], 2);
2417 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2418 prev_frame && prev_frame->seg_map ?
2419 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2421 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2424 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2426 if (mb->mode <= MODE_I4x4)
2427 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2429 inter_predict(s, td, dst, mb, mb_x, mb_y);
2431 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2434 idct_mb(s, td, dst, mb);
2436 AV_ZERO64(td->left_nnz);
2437 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2439 /* Reset DC block predictors if they would exist
2440 * if the mb had coefficients */
2441 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2442 td->left_nnz[8] = 0;
2443 s->top_nnz[mb_x][8] = 0;
2447 if (s->deblock_filter)
2448 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2450 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2451 if (s->filter.simple)
2452 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2453 NULL, NULL, s->linesize, 0, 1);
2455 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2456 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2459 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2464 td->mv_bounds.mv_min.x -= 64;
2465 td->mv_bounds.mv_max.x -= 64;
2467 if (mb_x == s->mb_width + 1) {
2468 update_pos(td, mb_y, s->mb_width + 3);
2470 update_pos(td, mb_y, mb_x);
2476 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2477 int jobnr, int threadnr)
2479 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2482 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2483 int jobnr, int threadnr)
2485 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2488 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2489 int jobnr, int threadnr, int is_vp7)
2491 VP8Context *s = avctx->priv_data;
2492 VP8ThreadData *td = &s->thread_data[threadnr];
2493 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2494 AVFrame *curframe = s->curframe->tf.f;
2496 VP8ThreadData *prev_td, *next_td;
2498 curframe->data[0] + 16 * mb_y * s->linesize,
2499 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2500 curframe->data[2] + 8 * mb_y * s->uvlinesize
2503 if (s->mb_layout == 1)
2504 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2506 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2511 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2512 if (mb_y == s->mb_height - 1)
2515 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2517 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2518 VP8FilterStrength *f = &td->filter_strength[mb_x];
2520 check_thread_pos(td, prev_td,
2521 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2523 if (next_td != &s->thread_data[0])
2524 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2526 if (num_jobs == 1) {
2527 if (s->filter.simple)
2528 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2529 NULL, NULL, s->linesize, 0, 1);
2531 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2532 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2535 if (s->filter.simple)
2536 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2538 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2543 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2547 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2548 int jobnr, int threadnr)
2550 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2553 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2554 int jobnr, int threadnr)
2556 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2559 static av_always_inline
2560 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2561 int threadnr, int is_vp7)
2563 VP8Context *s = avctx->priv_data;
2564 VP8ThreadData *td = &s->thread_data[jobnr];
2565 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2566 VP8Frame *curframe = s->curframe;
2567 int mb_y, num_jobs = s->num_jobs;
2570 td->thread_nr = threadnr;
2571 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2572 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2573 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2574 atomic_store(&td->thread_mb_pos, mb_y << 16);
2575 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2577 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2580 if (s->deblock_filter)
2581 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2582 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2584 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2585 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2587 if (avctx->active_thread_type == FF_THREAD_FRAME)
2588 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2594 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2595 int jobnr, int threadnr)
2597 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2600 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2601 int jobnr, int threadnr)
2603 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2606 static av_always_inline
2607 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2608 AVPacket *avpkt, int is_vp7)
2610 VP8Context *s = avctx->priv_data;
2611 int ret, i, referenced, num_jobs;
2612 enum AVDiscard skip_thresh;
2613 VP8Frame *av_uninit(curframe), *prev_frame;
2616 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2618 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2623 if (s->actually_webp) {
2624 // avctx->pix_fmt already set in caller.
2625 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2626 s->pix_fmt = get_pixel_format(s);
2627 if (s->pix_fmt < 0) {
2628 ret = AVERROR(EINVAL);
2631 avctx->pix_fmt = s->pix_fmt;
2634 prev_frame = s->framep[VP56_FRAME_CURRENT];
2636 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2637 s->update_altref == VP56_FRAME_CURRENT;
2639 skip_thresh = !referenced ? AVDISCARD_NONREF
2640 : !s->keyframe ? AVDISCARD_NONKEY
2643 if (avctx->skip_frame >= skip_thresh) {
2645 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2648 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2650 // release no longer referenced frames
2651 for (i = 0; i < 5; i++)
2652 if (s->frames[i].tf.f->buf[0] &&
2653 &s->frames[i] != prev_frame &&
2654 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2655 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2656 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2657 vp8_release_frame(s, &s->frames[i]);
2659 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2662 avctx->colorspace = AVCOL_SPC_BT470BG;
2664 avctx->color_range = AVCOL_RANGE_JPEG;
2666 avctx->color_range = AVCOL_RANGE_MPEG;
2668 /* Given that arithmetic probabilities are updated every frame, it's quite
2669 * likely that the values we have on a random interframe are complete
2670 * junk if we didn't start decode on a keyframe. So just don't display
2671 * anything rather than junk. */
2672 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2673 !s->framep[VP56_FRAME_GOLDEN] ||
2674 !s->framep[VP56_FRAME_GOLDEN2])) {
2675 av_log(avctx, AV_LOG_WARNING,
2676 "Discarding interframe without a prior keyframe!\n");
2677 ret = AVERROR_INVALIDDATA;
2681 curframe->tf.f->key_frame = s->keyframe;
2682 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2683 : AV_PICTURE_TYPE_P;
2684 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2687 // check if golden and altref are swapped
2688 if (s->update_altref != VP56_FRAME_NONE)
2689 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2691 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2693 if (s->update_golden != VP56_FRAME_NONE)
2694 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2696 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2699 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2701 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2703 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2705 ff_thread_finish_setup(avctx);
2707 if (avctx->hwaccel) {
2708 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2712 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2716 ret = avctx->hwaccel->end_frame(avctx);
2721 s->linesize = curframe->tf.f->linesize[0];
2722 s->uvlinesize = curframe->tf.f->linesize[1];
2724 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2725 /* Zero macroblock structures for top/top-left prediction
2726 * from outside the frame. */
2728 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2729 (s->mb_width + 1) * sizeof(*s->macroblocks));
2730 if (!s->mb_layout && s->keyframe)
2731 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2733 memset(s->ref_count, 0, sizeof(s->ref_count));
2735 if (s->mb_layout == 1) {
2736 // Make sure the previous frame has read its segmentation map,
2737 // if we re-use the same map.
2738 if (prev_frame && s->segmentation.enabled &&
2739 !s->segmentation.update_map)
2740 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2742 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2744 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2747 if (avctx->active_thread_type == FF_THREAD_FRAME)
2750 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2751 s->num_jobs = num_jobs;
2752 s->curframe = curframe;
2753 s->prev_frame = prev_frame;
2754 s->mv_bounds.mv_min.y = -MARGIN;
2755 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2756 for (i = 0; i < MAX_THREADS; i++) {
2757 VP8ThreadData *td = &s->thread_data[i];
2758 atomic_init(&td->thread_mb_pos, 0);
2759 atomic_init(&td->wait_mb_pos, INT_MAX);
2762 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2765 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2769 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2770 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2773 // if future frames don't use the updated probabilities,
2774 // reset them to the values we saved
2775 if (!s->update_probabilities)
2776 s->prob[0] = s->prob[1];
2778 if (!s->invisible) {
2779 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2786 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2790 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2793 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2796 #if CONFIG_VP7_DECODER
2797 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2800 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2802 #endif /* CONFIG_VP7_DECODER */
2804 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2806 VP8Context *s = avctx->priv_data;
2812 vp8_decode_flush_impl(avctx, 1);
2813 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2814 av_frame_free(&s->frames[i].tf.f);
2819 static av_cold int vp8_init_frames(VP8Context *s)
2822 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2823 s->frames[i].tf.f = av_frame_alloc();
2824 if (!s->frames[i].tf.f)
2825 return AVERROR(ENOMEM);
2830 static av_always_inline
2831 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2833 VP8Context *s = avctx->priv_data;
2837 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2838 s->pix_fmt = AV_PIX_FMT_NONE;
2839 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2840 avctx->internal->allocate_progress = 1;
2842 ff_videodsp_init(&s->vdsp, 8);
2844 ff_vp78dsp_init(&s->vp8dsp);
2845 if (CONFIG_VP7_DECODER && is_vp7) {
2846 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2847 ff_vp7dsp_init(&s->vp8dsp);
2848 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2849 s->filter_mb_row = vp7_filter_mb_row;
2850 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2851 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2852 ff_vp8dsp_init(&s->vp8dsp);
2853 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2854 s->filter_mb_row = vp8_filter_mb_row;
2857 /* does not change for VP8 */
2858 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2860 if ((ret = vp8_init_frames(s)) < 0) {
2861 ff_vp8_decode_free(avctx);
2868 #if CONFIG_VP7_DECODER
2869 static int vp7_decode_init(AVCodecContext *avctx)
2871 return vp78_decode_init(avctx, IS_VP7);
2873 #endif /* CONFIG_VP7_DECODER */
2875 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2877 return vp78_decode_init(avctx, IS_VP8);
2880 #if CONFIG_VP8_DECODER
2882 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2884 VP8Context *s = avctx->priv_data;
2889 if ((ret = vp8_init_frames(s)) < 0) {
2890 ff_vp8_decode_free(avctx);
2897 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2899 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2900 const AVCodecContext *src)
2902 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2905 if (s->macroblocks_base &&
2906 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2908 s->mb_width = s_src->mb_width;
2909 s->mb_height = s_src->mb_height;
2912 s->pix_fmt = s_src->pix_fmt;
2913 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2914 s->segmentation = s_src->segmentation;
2915 s->lf_delta = s_src->lf_delta;
2916 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2918 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2919 if (s_src->frames[i].tf.f->buf[0]) {
2920 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2926 s->framep[0] = REBASE(s_src->next_framep[0]);
2927 s->framep[1] = REBASE(s_src->next_framep[1]);
2928 s->framep[2] = REBASE(s_src->next_framep[2]);
2929 s->framep[3] = REBASE(s_src->next_framep[3]);
2933 #endif /* HAVE_THREADS */
2934 #endif /* CONFIG_VP8_DECODER */
2936 #if CONFIG_VP7_DECODER
2937 AVCodec ff_vp7_decoder = {
2939 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2940 .type = AVMEDIA_TYPE_VIDEO,
2941 .id = AV_CODEC_ID_VP7,
2942 .priv_data_size = sizeof(VP8Context),
2943 .init = vp7_decode_init,
2944 .close = ff_vp8_decode_free,
2945 .decode = vp7_decode_frame,
2946 .capabilities = AV_CODEC_CAP_DR1,
2947 .flush = vp8_decode_flush,
2949 #endif /* CONFIG_VP7_DECODER */
2951 #if CONFIG_VP8_DECODER
2952 AVCodec ff_vp8_decoder = {
2954 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2955 .type = AVMEDIA_TYPE_VIDEO,
2956 .id = AV_CODEC_ID_VP8,
2957 .priv_data_size = sizeof(VP8Context),
2958 .init = ff_vp8_decode_init,
2959 .close = ff_vp8_decode_free,
2960 .decode = ff_vp8_decode_frame,
2961 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2962 AV_CODEC_CAP_SLICE_THREADS,
2963 .flush = vp8_decode_flush,
2964 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2965 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2966 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2967 #if CONFIG_VP8_VAAPI_HWACCEL
2970 #if CONFIG_VP8_NVDEC_HWACCEL
2976 #endif /* CONFIG_VP7_DECODER */