2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
50 static void free_buffers(VP8Context *s)
54 for (i = 0; i < MAX_THREADS; i++) {
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
59 av_freep(&s->thread_data[i].filter_strength);
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
67 s->macroblocks = NULL;
70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
103 #if CONFIG_VP8_DECODER
104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
108 vp8_release_frame(s, dst);
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
126 #endif /* CONFIG_VP8_DECODER */
128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 VP8Context *s = avctx->priv_data;
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
141 static void vp8_decode_flush(AVCodecContext *avctx)
143 vp8_decode_flush_impl(avctx, 0);
146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 VP8Frame *frame = NULL;
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
176 #if CONFIG_VP8_NVDEC_HWACCEL
183 return ff_get_format(s->avctx, pix_fmts);
186 static av_always_inline
187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189 AVCodecContext *avctx = s->avctx;
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
196 ret = ff_set_dimensions(s->avctx, width, height);
201 if (!s->actually_webp && !is_vp7) {
202 s->pix_fmt = get_pixel_format(s);
204 return AVERROR(EINVAL);
205 avctx->pix_fmt = s->pix_fmt;
208 s->mb_width = (s->avctx->coded_width + 15) / 16;
209 s->mb_height = (s->avctx->coded_height + 15) / 16;
211 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
212 avctx->thread_count > 1;
213 if (!s->mb_layout) { // Frame threading and one thread
214 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
215 sizeof(*s->macroblocks));
216 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
217 } else // Sliced threading
218 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
219 sizeof(*s->macroblocks));
220 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
221 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
222 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
224 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
225 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
227 return AVERROR(ENOMEM);
230 for (i = 0; i < MAX_THREADS; i++) {
231 s->thread_data[i].filter_strength =
232 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
233 if (!s->thread_data[i].filter_strength) {
235 return AVERROR(ENOMEM);
238 pthread_mutex_init(&s->thread_data[i].lock, NULL);
239 pthread_cond_init(&s->thread_data[i].cond, NULL);
243 s->macroblocks = s->macroblocks_base + 1;
248 static int vp7_update_dimensions(VP8Context *s, int width, int height)
250 return update_dimensions(s, width, height, IS_VP7);
253 static int vp8_update_dimensions(VP8Context *s, int width, int height)
255 return update_dimensions(s, width, height, IS_VP8);
259 static void parse_segment_info(VP8Context *s)
261 VP56RangeCoder *c = &s->c;
264 s->segmentation.update_map = vp8_rac_get(c);
265 s->segmentation.update_feature_data = vp8_rac_get(c);
267 if (s->segmentation.update_feature_data) {
268 s->segmentation.absolute_vals = vp8_rac_get(c);
270 for (i = 0; i < 4; i++)
271 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
273 for (i = 0; i < 4; i++)
274 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
276 if (s->segmentation.update_map)
277 for (i = 0; i < 3; i++)
278 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
281 static void update_lf_deltas(VP8Context *s)
283 VP56RangeCoder *c = &s->c;
286 for (i = 0; i < 4; i++) {
287 if (vp8_rac_get(c)) {
288 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
291 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
296 if (vp8_rac_get(c)) {
297 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
300 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
305 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
307 const uint8_t *sizes = buf;
311 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
313 buf += 3 * (s->num_coeff_partitions - 1);
314 buf_size -= 3 * (s->num_coeff_partitions - 1);
318 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
319 int size = AV_RL24(sizes + 3 * i);
320 if (buf_size - size < 0)
322 s->coeff_partition_size[i] = size;
324 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
331 s->coeff_partition_size[i] = buf_size;
332 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
337 static void vp7_get_quants(VP8Context *s)
339 VP56RangeCoder *c = &s->c;
341 int yac_qi = vp8_rac_get_uint(c, 7);
342 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
343 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
344 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
345 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
349 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
350 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
351 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
352 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
353 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
356 static void vp8_get_quants(VP8Context *s)
358 VP56RangeCoder *c = &s->c;
361 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
362 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
363 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
364 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
365 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
368 for (i = 0; i < 4; i++) {
369 if (s->segmentation.enabled) {
370 base_qi = s->segmentation.base_quant[i];
371 if (!s->segmentation.absolute_vals)
372 base_qi += s->quant.yac_qi;
374 base_qi = s->quant.yac_qi;
376 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
377 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
378 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
379 /* 101581>>16 is equivalent to 155/100 */
380 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
381 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
382 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
384 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
385 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
390 * Determine which buffers golden and altref should be updated with after this frame.
391 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
393 * Intra frames update all 3 references
394 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
395 * If the update (golden|altref) flag is set, it's updated with the current frame
396 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
397 * If the flag is not set, the number read means:
399 * 1: VP56_FRAME_PREVIOUS
400 * 2: update golden with altref, or update altref with golden
402 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
404 VP56RangeCoder *c = &s->c;
407 return VP56_FRAME_CURRENT;
409 switch (vp8_rac_get_uint(c, 2)) {
411 return VP56_FRAME_PREVIOUS;
413 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
415 return VP56_FRAME_NONE;
418 static void vp78_reset_probability_tables(VP8Context *s)
421 for (i = 0; i < 4; i++)
422 for (j = 0; j < 16; j++)
423 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
424 sizeof(s->prob->token[i][j]));
427 static void vp78_update_probability_tables(VP8Context *s)
429 VP56RangeCoder *c = &s->c;
432 for (i = 0; i < 4; i++)
433 for (j = 0; j < 8; j++)
434 for (k = 0; k < 3; k++)
435 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
436 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
437 int prob = vp8_rac_get_uint(c, 8);
438 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
439 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443 #define VP7_MVC_SIZE 17
444 #define VP8_MVC_SIZE 19
446 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
449 VP56RangeCoder *c = &s->c;
453 for (i = 0; i < 4; i++)
454 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
456 for (i = 0; i < 3; i++)
457 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
459 // 17.2 MV probability update
460 for (i = 0; i < 2; i++)
461 for (j = 0; j < mvc_size; j++)
462 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
463 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
466 static void update_refs(VP8Context *s)
468 VP56RangeCoder *c = &s->c;
470 int update_golden = vp8_rac_get(c);
471 int update_altref = vp8_rac_get(c);
473 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
474 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
477 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481 for (j = 1; j < 3; j++) {
482 for (i = 0; i < height / 2; i++)
483 memcpy(dst->data[j] + i * dst->linesize[j],
484 src->data[j] + i * src->linesize[j], width / 2);
488 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
489 const uint8_t *src, ptrdiff_t src_linesize,
490 int width, int height,
494 for (j = 0; j < height; j++) {
495 const uint8_t *src2 = src + j * src_linesize;
496 uint8_t *dst2 = dst + j * dst_linesize;
497 for (i = 0; i < width; i++) {
499 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
504 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
506 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
507 int beta = (int8_t) vp8_rac_get_uint(c, 8);
510 if (c->end <= c->buffer && c->bits >= 0)
511 return AVERROR_INVALIDDATA;
513 if (!s->keyframe && (alpha || beta)) {
514 int width = s->mb_width * 16;
515 int height = s->mb_height * 16;
518 if (!s->framep[VP56_FRAME_PREVIOUS] ||
519 !s->framep[VP56_FRAME_GOLDEN]) {
520 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
521 return AVERROR_INVALIDDATA;
525 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
527 /* preserve the golden frame, write a new previous frame */
528 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
529 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
530 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
533 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
535 copy_chroma(dst, src, width, height);
538 fade(dst->data[0], dst->linesize[0],
539 src->data[0], src->linesize[0],
540 width, height, alpha, beta);
546 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
548 VP56RangeCoder *c = &s->c;
549 int part1_size, hscale, vscale, i, j, ret;
550 int width = s->avctx->width;
551 int height = s->avctx->height;
554 return AVERROR_INVALIDDATA;
557 s->profile = (buf[0] >> 1) & 7;
558 if (s->profile > 1) {
559 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
560 return AVERROR_INVALIDDATA;
563 s->keyframe = !(buf[0] & 1);
565 part1_size = AV_RL24(buf) >> 4;
567 if (buf_size < 4 - s->profile + part1_size) {
568 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
569 return AVERROR_INVALIDDATA;
572 buf += 4 - s->profile;
573 buf_size -= 4 - s->profile;
575 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
577 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
581 buf_size -= part1_size;
583 /* A. Dimension information (keyframes only) */
585 width = vp8_rac_get_uint(c, 12);
586 height = vp8_rac_get_uint(c, 12);
587 hscale = vp8_rac_get_uint(c, 2);
588 vscale = vp8_rac_get_uint(c, 2);
589 if (hscale || vscale)
590 avpriv_request_sample(s->avctx, "Upscaling");
592 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
593 vp78_reset_probability_tables(s);
594 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
595 sizeof(s->prob->pred16x16));
596 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
597 sizeof(s->prob->pred8x8c));
598 for (i = 0; i < 2; i++)
599 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
600 sizeof(vp7_mv_default_prob[i]));
601 memset(&s->segmentation, 0, sizeof(s->segmentation));
602 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
603 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
606 if (s->keyframe || s->profile > 0)
607 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
609 /* B. Decoding information for all four macroblock-level features */
610 for (i = 0; i < 4; i++) {
611 s->feature_enabled[i] = vp8_rac_get(c);
612 if (s->feature_enabled[i]) {
613 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
615 for (j = 0; j < 3; j++)
616 s->feature_index_prob[i][j] =
617 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
619 if (vp7_feature_value_size[s->profile][i])
620 for (j = 0; j < 4; j++)
621 s->feature_value[i][j] =
622 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
626 s->segmentation.enabled = 0;
627 s->segmentation.update_map = 0;
628 s->lf_delta.enabled = 0;
630 s->num_coeff_partitions = 1;
631 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
635 if (!s->macroblocks_base || /* first frame */
636 width != s->avctx->width || height != s->avctx->height ||
637 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
638 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
642 /* C. Dequantization indices */
645 /* D. Golden frame update flag (a Flag) for interframes only */
647 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
648 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
652 s->update_probabilities = 1;
655 if (s->profile > 0) {
656 s->update_probabilities = vp8_rac_get(c);
657 if (!s->update_probabilities)
658 s->prob[1] = s->prob[0];
661 s->fade_present = vp8_rac_get(c);
664 if (c->end <= c->buffer && c->bits >= 0)
665 return AVERROR_INVALIDDATA;
666 /* E. Fading information for previous frame */
667 if (s->fade_present && vp8_rac_get(c)) {
668 if ((ret = vp7_fade_frame(s ,c)) < 0)
672 /* F. Loop filter type */
674 s->filter.simple = vp8_rac_get(c);
676 /* G. DCT coefficient ordering specification */
678 for (i = 1; i < 16; i++)
679 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
681 /* H. Loop filter levels */
683 s->filter.simple = vp8_rac_get(c);
684 s->filter.level = vp8_rac_get_uint(c, 6);
685 s->filter.sharpness = vp8_rac_get_uint(c, 3);
687 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
688 vp78_update_probability_tables(s);
690 s->mbskip_enabled = 0;
692 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
694 s->prob->intra = vp8_rac_get_uint(c, 8);
695 s->prob->last = vp8_rac_get_uint(c, 8);
696 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
702 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
704 VP56RangeCoder *c = &s->c;
705 int header_size, hscale, vscale, ret;
706 int width = s->avctx->width;
707 int height = s->avctx->height;
710 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
711 return AVERROR_INVALIDDATA;
714 s->keyframe = !(buf[0] & 1);
715 s->profile = (buf[0]>>1) & 7;
716 s->invisible = !(buf[0] & 0x10);
717 header_size = AV_RL24(buf) >> 5;
721 s->header_partition_size = header_size;
724 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
727 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
728 sizeof(s->put_pixels_tab));
729 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
730 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
731 sizeof(s->put_pixels_tab));
733 if (header_size > buf_size - 7 * s->keyframe) {
734 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
735 return AVERROR_INVALIDDATA;
739 if (AV_RL24(buf) != 0x2a019d) {
740 av_log(s->avctx, AV_LOG_ERROR,
741 "Invalid start code 0x%x\n", AV_RL24(buf));
742 return AVERROR_INVALIDDATA;
744 width = AV_RL16(buf + 3) & 0x3fff;
745 height = AV_RL16(buf + 5) & 0x3fff;
746 hscale = buf[4] >> 6;
747 vscale = buf[6] >> 6;
751 if (hscale || vscale)
752 avpriv_request_sample(s->avctx, "Upscaling");
754 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
755 vp78_reset_probability_tables(s);
756 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
757 sizeof(s->prob->pred16x16));
758 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
759 sizeof(s->prob->pred8x8c));
760 memcpy(s->prob->mvc, vp8_mv_default_prob,
761 sizeof(s->prob->mvc));
762 memset(&s->segmentation, 0, sizeof(s->segmentation));
763 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
766 ret = ff_vp56_init_range_decoder(c, buf, header_size);
770 buf_size -= header_size;
773 s->colorspace = vp8_rac_get(c);
775 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
776 s->fullrange = vp8_rac_get(c);
779 if ((s->segmentation.enabled = vp8_rac_get(c)))
780 parse_segment_info(s);
782 s->segmentation.update_map = 0; // FIXME: move this to some init function?
784 s->filter.simple = vp8_rac_get(c);
785 s->filter.level = vp8_rac_get_uint(c, 6);
786 s->filter.sharpness = vp8_rac_get_uint(c, 3);
788 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
789 s->lf_delta.update = vp8_rac_get(c);
790 if (s->lf_delta.update)
794 if (setup_partitions(s, buf, buf_size)) {
795 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
796 return AVERROR_INVALIDDATA;
799 if (!s->macroblocks_base || /* first frame */
800 width != s->avctx->width || height != s->avctx->height ||
801 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
802 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
809 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
810 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
813 // if we aren't saving this frame's probabilities for future frames,
814 // make a copy of the current probabilities
815 if (!(s->update_probabilities = vp8_rac_get(c)))
816 s->prob[1] = s->prob[0];
818 s->update_last = s->keyframe || vp8_rac_get(c);
820 vp78_update_probability_tables(s);
822 if ((s->mbskip_enabled = vp8_rac_get(c)))
823 s->prob->mbskip = vp8_rac_get_uint(c, 8);
826 s->prob->intra = vp8_rac_get_uint(c, 8);
827 s->prob->last = vp8_rac_get_uint(c, 8);
828 s->prob->golden = vp8_rac_get_uint(c, 8);
829 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
832 // Record the entropy coder state here so that hwaccels can use it.
833 s->c.code_word = vp56_rac_renorm(&s->c);
834 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
835 s->coder_state_at_header_end.range = s->c.high;
836 s->coder_state_at_header_end.value = s->c.code_word >> 16;
837 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
842 static av_always_inline
843 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
845 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
846 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
847 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
848 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
852 * Motion vector coding, 17.1.
854 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
858 if (vp56_rac_get_prob_branchy(c, p[0])) {
861 for (i = 0; i < 3; i++)
862 x += vp56_rac_get_prob(c, p[9 + i]) << i;
863 for (i = (vp7 ? 7 : 9); i > 3; i--)
864 x += vp56_rac_get_prob(c, p[9 + i]) << i;
865 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
869 const uint8_t *ps = p + 2;
870 bit = vp56_rac_get_prob(c, *ps);
873 bit = vp56_rac_get_prob(c, *ps);
876 x += vp56_rac_get_prob(c, *ps);
879 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
882 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
884 return read_mv_component(c, p, 1);
887 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
889 return read_mv_component(c, p, 0);
892 static av_always_inline
893 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
896 return vp7_submv_prob;
899 return vp8_submv_prob[4 - !!left];
901 return vp8_submv_prob[2];
902 return vp8_submv_prob[1 - !!left];
906 * Split motion vector prediction, 16.4.
907 * @returns the number of motion vectors parsed (2, 4 or 16)
909 static av_always_inline
910 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
911 int layout, int is_vp7)
915 VP8Macroblock *top_mb;
916 VP8Macroblock *left_mb = &mb[-1];
917 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
918 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
920 VP56mv *left_mv = left_mb->bmv;
921 VP56mv *cur_mv = mb->bmv;
923 if (!layout) // layout is inlined, s->mb_layout is not
926 top_mb = &mb[-s->mb_width - 1];
927 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
928 top_mv = top_mb->bmv;
930 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
931 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
932 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
934 part_idx = VP8_SPLITMVMODE_8x8;
936 part_idx = VP8_SPLITMVMODE_4x4;
939 num = vp8_mbsplit_count[part_idx];
940 mbsplits_cur = vp8_mbsplits[part_idx],
941 firstidx = vp8_mbfirstidx[part_idx];
942 mb->partitioning = part_idx;
944 for (n = 0; n < num; n++) {
946 uint32_t left, above;
947 const uint8_t *submv_prob;
950 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
952 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
954 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
956 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
958 submv_prob = get_submv_prob(left, above, is_vp7);
960 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
961 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
962 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
963 mb->bmv[n].y = mb->mv.y +
964 read_mv_component(c, s->prob->mvc[0], is_vp7);
965 mb->bmv[n].x = mb->mv.x +
966 read_mv_component(c, s->prob->mvc[1], is_vp7);
968 AV_ZERO32(&mb->bmv[n]);
971 AV_WN32A(&mb->bmv[n], above);
974 AV_WN32A(&mb->bmv[n], left);
982 * The vp7 reference decoder uses a padding macroblock column (added to right
983 * edge of the frame) to guard against illegal macroblock offsets. The
984 * algorithm has bugs that permit offsets to straddle the padding column.
985 * This function replicates those bugs.
987 * @param[out] edge_x macroblock x address
988 * @param[out] edge_y macroblock y address
990 * @return macroblock offset legal (boolean)
992 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
993 int xoffset, int yoffset, int boundary,
994 int *edge_x, int *edge_y)
996 int vwidth = mb_width + 1;
997 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
998 if (new < boundary || new % vwidth == vwidth - 1)
1000 *edge_y = new / vwidth;
1001 *edge_x = new % vwidth;
1005 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1007 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1010 static av_always_inline
1011 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1012 int mb_x, int mb_y, int layout)
1014 VP8Macroblock *mb_edge[12];
1015 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1016 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1019 uint8_t cnt[3] = { 0 };
1020 VP56RangeCoder *c = &s->c;
1023 AV_ZERO32(&near_mv[0]);
1024 AV_ZERO32(&near_mv[1]);
1025 AV_ZERO32(&near_mv[2]);
1027 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1028 const VP7MVPred * pred = &vp7_mv_pred[i];
1031 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1032 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1033 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1034 ? s->macroblocks_base + 1 + edge_x +
1035 (s->mb_width + 1) * (edge_y + 1)
1036 : s->macroblocks + edge_x +
1037 (s->mb_height - edge_y - 1) * 2;
1038 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1040 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1041 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1043 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1044 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1048 AV_WN32A(&near_mv[CNT_NEAR], mv);
1052 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1061 cnt[idx] += vp7_mv_pred[i].score;
1064 mb->partitioning = VP8_SPLITMVMODE_NONE;
1066 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1067 mb->mode = VP8_MVMODE_MV;
1069 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1071 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1073 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1074 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1076 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1078 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1079 mb->mode = VP8_MVMODE_SPLIT;
1080 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1082 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1083 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1084 mb->bmv[0] = mb->mv;
1087 mb->mv = near_mv[CNT_NEAR];
1088 mb->bmv[0] = mb->mv;
1091 mb->mv = near_mv[CNT_NEAREST];
1092 mb->bmv[0] = mb->mv;
1095 mb->mode = VP8_MVMODE_ZERO;
1097 mb->bmv[0] = mb->mv;
1101 static av_always_inline
1102 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1103 int mb_x, int mb_y, int layout)
1105 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1108 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1109 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1111 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1112 int8_t *sign_bias = s->sign_bias;
1114 uint8_t cnt[4] = { 0 };
1115 VP56RangeCoder *c = &s->c;
1117 if (!layout) { // layout is inlined (s->mb_layout is not)
1118 mb_edge[0] = mb + 2;
1119 mb_edge[2] = mb + 1;
1121 mb_edge[0] = mb - s->mb_width - 1;
1122 mb_edge[2] = mb - s->mb_width - 2;
1125 AV_ZERO32(&near_mv[0]);
1126 AV_ZERO32(&near_mv[1]);
1127 AV_ZERO32(&near_mv[2]);
1129 /* Process MB on top, left and top-left */
1130 #define MV_EDGE_CHECK(n) \
1132 VP8Macroblock *edge = mb_edge[n]; \
1133 int edge_ref = edge->ref_frame; \
1134 if (edge_ref != VP56_FRAME_CURRENT) { \
1135 uint32_t mv = AV_RN32A(&edge->mv); \
1137 if (cur_sign_bias != sign_bias[edge_ref]) { \
1138 /* SWAR negate of the values in mv. */ \
1140 mv = ((mv & 0x7fff7fff) + \
1141 0x00010001) ^ (mv & 0x80008000); \
1143 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1144 AV_WN32A(&near_mv[++idx], mv); \
1145 cnt[idx] += 1 + (n != 2); \
1147 cnt[CNT_ZERO] += 1 + (n != 2); \
1155 mb->partitioning = VP8_SPLITMVMODE_NONE;
1156 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1157 mb->mode = VP8_MVMODE_MV;
1159 /* If we have three distinct MVs, merge first and last if they're the same */
1160 if (cnt[CNT_SPLITMV] &&
1161 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1162 cnt[CNT_NEAREST] += 1;
1164 /* Swap near and nearest if necessary */
1165 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1166 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1167 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1170 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1171 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1172 /* Choose the best mv out of 0,0 and the nearest mv */
1173 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1174 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1175 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1176 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1178 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1179 mb->mode = VP8_MVMODE_SPLIT;
1180 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1182 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1183 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1184 mb->bmv[0] = mb->mv;
1187 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1188 mb->bmv[0] = mb->mv;
1191 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1192 mb->bmv[0] = mb->mv;
1195 mb->mode = VP8_MVMODE_ZERO;
1197 mb->bmv[0] = mb->mv;
1201 static av_always_inline
1202 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1203 int mb_x, int keyframe, int layout)
1205 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1208 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1209 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1214 uint8_t *const left = s->intra4x4_pred_mode_left;
1216 top = mb->intra4x4_pred_mode_top;
1218 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1219 for (y = 0; y < 4; y++) {
1220 for (x = 0; x < 4; x++) {
1222 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1223 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1224 left[y] = top[x] = *intra4x4;
1230 for (i = 0; i < 16; i++)
1231 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1232 vp8_pred4x4_prob_inter);
1236 static av_always_inline
1237 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1238 VP8Macroblock *mb, int mb_x, int mb_y,
1239 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1241 VP56RangeCoder *c = &s->c;
1242 static const char * const vp7_feature_name[] = { "q-index",
1244 "partial-golden-update",
1249 for (i = 0; i < 4; i++) {
1250 if (s->feature_enabled[i]) {
1251 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1252 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1253 s->feature_index_prob[i]);
1254 av_log(s->avctx, AV_LOG_WARNING,
1255 "Feature %s present in macroblock (value 0x%x)\n",
1256 vp7_feature_name[i], s->feature_value[i][index]);
1260 } else if (s->segmentation.update_map) {
1261 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1262 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1263 } else if (s->segmentation.enabled)
1264 *segment = ref ? *ref : *segment;
1265 mb->segment = *segment;
1267 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1270 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1271 vp8_pred16x16_prob_intra);
1273 if (mb->mode == MODE_I4x4) {
1274 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1276 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1277 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1279 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1281 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1282 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1285 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1286 vp8_pred8x8c_prob_intra);
1287 mb->ref_frame = VP56_FRAME_CURRENT;
1288 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1290 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1292 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1293 : VP56_FRAME_GOLDEN;
1295 mb->ref_frame = VP56_FRAME_PREVIOUS;
1296 s->ref_count[mb->ref_frame - 1]++;
1298 // motion vectors, 16.3
1300 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1302 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1305 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1307 if (mb->mode == MODE_I4x4)
1308 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1310 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1312 mb->ref_frame = VP56_FRAME_CURRENT;
1313 mb->partitioning = VP8_SPLITMVMODE_NONE;
1314 AV_ZERO32(&mb->bmv[0]);
1319 * @param r arithmetic bitstream reader context
1320 * @param block destination for block coefficients
1321 * @param probs probabilities to use when reading trees from the bitstream
1322 * @param i initial coeff index, 0 unless a separate DC block is coded
1323 * @param qmul array holding the dc/ac dequant factor at position 0/1
1325 * @return 0 if no coeffs were decoded
1326 * otherwise, the index of the last coeff decoded plus one
1328 static av_always_inline
1329 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1330 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1331 int i, uint8_t *token_prob, int16_t qmul[2],
1332 const uint8_t scan[16], int vp7)
1334 VP56RangeCoder c = *r;
1339 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1343 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1345 break; // invalid input; blocks should end with EOB
1346 token_prob = probs[i][0];
1352 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1354 token_prob = probs[i + 1][1];
1356 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1357 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1359 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1363 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1364 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1365 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1366 } else { // DCT_CAT2
1368 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1369 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1371 } else { // DCT_CAT3 and up
1372 int a = vp56_rac_get_prob(&c, token_prob[8]);
1373 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1374 int cat = (a << 1) + b;
1375 coeff = 3 + (8 << cat);
1376 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1379 token_prob = probs[i + 1][2];
1381 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1388 static av_always_inline
1389 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1391 int16_t dc = block[0];
1399 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1400 block[0] = pred[0] = dc;
1405 block[0] = pred[0] = dc;
1411 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1413 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1414 int i, uint8_t *token_prob,
1416 const uint8_t scan[16])
1418 return decode_block_coeffs_internal(r, block, probs, i,
1419 token_prob, qmul, scan, IS_VP7);
1422 #ifndef vp8_decode_block_coeffs_internal
1423 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1425 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1426 int i, uint8_t *token_prob,
1429 return decode_block_coeffs_internal(r, block, probs, i,
1430 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1435 * @param c arithmetic bitstream reader context
1436 * @param block destination for block coefficients
1437 * @param probs probabilities to use when reading trees from the bitstream
1438 * @param i initial coeff index, 0 unless a separate DC block is coded
1439 * @param zero_nhood the initial prediction context for number of surrounding
1440 * all-zero blocks (only left/top, so 0-2)
1441 * @param qmul array holding the dc/ac dequant factor at position 0/1
1442 * @param scan scan pattern (VP7 only)
1444 * @return 0 if no coeffs were decoded
1445 * otherwise, the index of the last coeff decoded plus one
1447 static av_always_inline
1448 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1449 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1450 int i, int zero_nhood, int16_t qmul[2],
1451 const uint8_t scan[16], int vp7)
1453 uint8_t *token_prob = probs[i][zero_nhood];
1454 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1456 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1457 token_prob, qmul, scan)
1458 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1462 static av_always_inline
1463 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1464 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1467 int i, x, y, luma_start = 0, luma_ctx = 3;
1468 int nnz_pred, nnz, nnz_total = 0;
1469 int segment = mb->segment;
1472 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1473 nnz_pred = t_nnz[8] + l_nnz[8];
1475 // decode DC values and do hadamard
1476 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1477 nnz_pred, s->qmat[segment].luma_dc_qmul,
1478 ff_zigzag_scan, is_vp7);
1479 l_nnz[8] = t_nnz[8] = !!nnz;
1481 if (is_vp7 && mb->mode > MODE_I4x4) {
1482 nnz |= inter_predict_dc(td->block_dc,
1483 s->inter_dc_pred[mb->ref_frame - 1]);
1490 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1492 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1499 for (y = 0; y < 4; y++)
1500 for (x = 0; x < 4; x++) {
1501 nnz_pred = l_nnz[y] + t_nnz[x];
1502 nnz = decode_block_coeffs(c, td->block[y][x],
1503 s->prob->token[luma_ctx],
1504 luma_start, nnz_pred,
1505 s->qmat[segment].luma_qmul,
1506 s->prob[0].scan, is_vp7);
1507 /* nnz+block_dc may be one more than the actual last index,
1508 * but we don't care */
1509 td->non_zero_count_cache[y][x] = nnz + block_dc;
1510 t_nnz[x] = l_nnz[y] = !!nnz;
1515 // TODO: what to do about dimensions? 2nd dim for luma is x,
1516 // but for chroma it's (y<<1)|x
1517 for (i = 4; i < 6; i++)
1518 for (y = 0; y < 2; y++)
1519 for (x = 0; x < 2; x++) {
1520 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1521 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1522 s->prob->token[2], 0, nnz_pred,
1523 s->qmat[segment].chroma_qmul,
1524 s->prob[0].scan, is_vp7);
1525 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1526 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1530 // if there were no coded coeffs despite the macroblock not being marked skip,
1531 // we MUST not do the inner loop filter and should not do IDCT
1532 // Since skip isn't used for bitstream prediction, just manually set it.
1537 static av_always_inline
1538 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1539 uint8_t *src_cb, uint8_t *src_cr,
1540 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1542 AV_COPY128(top_border, src_y + 15 * linesize);
1544 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1545 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1549 static av_always_inline
1550 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1551 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1552 int mb_y, int mb_width, int simple, int xchg)
1554 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1556 src_cb -= uvlinesize;
1557 src_cr -= uvlinesize;
1559 #define XCHG(a, b, xchg) \
1567 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1568 XCHG(top_border, src_y, xchg);
1569 XCHG(top_border + 8, src_y + 8, 1);
1570 if (mb_x < mb_width - 1)
1571 XCHG(top_border + 32, src_y + 16, 1);
1573 // only copy chroma for normal loop filter
1574 // or to initialize the top row to 127
1575 if (!simple || !mb_y) {
1576 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1577 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1578 XCHG(top_border + 16, src_cb, 1);
1579 XCHG(top_border + 24, src_cr, 1);
1583 static av_always_inline
1584 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1587 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1589 return mb_y ? mode : LEFT_DC_PRED8x8;
1592 static av_always_inline
1593 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1596 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1598 return mb_y ? mode : HOR_PRED8x8;
1601 static av_always_inline
1602 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1606 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1608 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1610 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1611 case PLANE_PRED8x8: /* TM */
1612 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1617 static av_always_inline
1618 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1621 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1623 return mb_y ? mode : HOR_VP8_PRED;
1627 static av_always_inline
1628 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1629 int *copy_buf, int vp7)
1633 if (!mb_x && mb_y) {
1638 case DIAG_DOWN_LEFT_PRED:
1639 case VERT_LEFT_PRED:
1640 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1648 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1650 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1651 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1652 * as 16x16/8x8 DC */
1653 case DIAG_DOWN_RIGHT_PRED:
1654 case VERT_RIGHT_PRED:
1663 static av_always_inline
1664 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1665 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1667 int x, y, mode, nnz;
1670 /* for the first row, we need to run xchg_mb_border to init the top edge
1671 * to 127 otherwise, skip it if we aren't going to deblock */
1672 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1673 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1674 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1675 s->filter.simple, 1);
1677 if (mb->mode < MODE_I4x4) {
1678 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1679 s->hpc.pred16x16[mode](dst[0], s->linesize);
1681 uint8_t *ptr = dst[0];
1682 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1683 const uint8_t lo = is_vp7 ? 128 : 127;
1684 const uint8_t hi = is_vp7 ? 128 : 129;
1685 uint8_t tr_top[4] = { lo, lo, lo, lo };
1687 // all blocks on the right edge of the macroblock use bottom edge
1688 // the top macroblock for their topright edge
1689 uint8_t *tr_right = ptr - s->linesize + 16;
1691 // if we're on the right edge of the frame, said edge is extended
1692 // from the top macroblock
1693 if (mb_y && mb_x == s->mb_width - 1) {
1694 tr = tr_right[-1] * 0x01010101u;
1695 tr_right = (uint8_t *) &tr;
1699 AV_ZERO128(td->non_zero_count_cache);
1701 for (y = 0; y < 4; y++) {
1702 uint8_t *topright = ptr + 4 - s->linesize;
1703 for (x = 0; x < 4; x++) {
1705 ptrdiff_t linesize = s->linesize;
1706 uint8_t *dst = ptr + 4 * x;
1707 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1709 if ((y == 0 || x == 3) && mb_y == 0) {
1712 topright = tr_right;
1714 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1715 mb_y + y, ©, is_vp7);
1717 dst = copy_dst + 12;
1721 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1723 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1727 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1736 copy_dst[11] = ptr[4 * x - 1];
1737 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1738 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1739 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1742 s->hpc.pred4x4[mode](dst, topright, linesize);
1744 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1745 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1746 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1747 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1750 nnz = td->non_zero_count_cache[y][x];
1753 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1754 td->block[y][x], s->linesize);
1756 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1757 td->block[y][x], s->linesize);
1762 ptr += 4 * s->linesize;
1767 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1768 mb_x, mb_y, is_vp7);
1769 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1770 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1772 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1773 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1774 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1775 s->filter.simple, 0);
1778 static const uint8_t subpel_idx[3][8] = {
1779 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1780 // also function pointer index
1781 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1782 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1788 * @param s VP8 decoding context
1789 * @param dst target buffer for block data at block position
1790 * @param ref reference picture buffer at origin (0, 0)
1791 * @param mv motion vector (relative to block position) to get pixel data from
1792 * @param x_off horizontal position of block from origin (0, 0)
1793 * @param y_off vertical position of block from origin (0, 0)
1794 * @param block_w width of block (16, 8 or 4)
1795 * @param block_h height of block (always same as block_w)
1796 * @param width width of src/dst plane data
1797 * @param height height of src/dst plane data
1798 * @param linesize size of a single line of plane data, including padding
1799 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1801 static av_always_inline
1802 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1803 ThreadFrame *ref, const VP56mv *mv,
1804 int x_off, int y_off, int block_w, int block_h,
1805 int width, int height, ptrdiff_t linesize,
1806 vp8_mc_func mc_func[3][3])
1808 uint8_t *src = ref->f->data[0];
1811 ptrdiff_t src_linesize = linesize;
1813 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1814 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1816 x_off += mv->x >> 2;
1817 y_off += mv->y >> 2;
1820 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1821 src += y_off * linesize + x_off;
1822 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1823 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1824 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1825 src - my_idx * linesize - mx_idx,
1826 EDGE_EMU_LINESIZE, linesize,
1827 block_w + subpel_idx[1][mx],
1828 block_h + subpel_idx[1][my],
1829 x_off - mx_idx, y_off - my_idx,
1831 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1832 src_linesize = EDGE_EMU_LINESIZE;
1834 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1836 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1837 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1838 linesize, block_h, 0, 0);
1843 * chroma MC function
1845 * @param s VP8 decoding context
1846 * @param dst1 target buffer for block data at block position (U plane)
1847 * @param dst2 target buffer for block data at block position (V plane)
1848 * @param ref reference picture buffer at origin (0, 0)
1849 * @param mv motion vector (relative to block position) to get pixel data from
1850 * @param x_off horizontal position of block from origin (0, 0)
1851 * @param y_off vertical position of block from origin (0, 0)
1852 * @param block_w width of block (16, 8 or 4)
1853 * @param block_h height of block (always same as block_w)
1854 * @param width width of src/dst plane data
1855 * @param height height of src/dst plane data
1856 * @param linesize size of a single line of plane data, including padding
1857 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1859 static av_always_inline
1860 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1861 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1862 int x_off, int y_off, int block_w, int block_h,
1863 int width, int height, ptrdiff_t linesize,
1864 vp8_mc_func mc_func[3][3])
1866 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1869 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1870 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1872 x_off += mv->x >> 3;
1873 y_off += mv->y >> 3;
1876 src1 += y_off * linesize + x_off;
1877 src2 += y_off * linesize + x_off;
1878 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1879 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1880 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1881 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1882 src1 - my_idx * linesize - mx_idx,
1883 EDGE_EMU_LINESIZE, linesize,
1884 block_w + subpel_idx[1][mx],
1885 block_h + subpel_idx[1][my],
1886 x_off - mx_idx, y_off - my_idx, width, height);
1887 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1888 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1890 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1891 src2 - my_idx * linesize - mx_idx,
1892 EDGE_EMU_LINESIZE, linesize,
1893 block_w + subpel_idx[1][mx],
1894 block_h + subpel_idx[1][my],
1895 x_off - mx_idx, y_off - my_idx, width, height);
1896 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1897 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1899 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1900 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1903 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1904 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1905 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1909 static av_always_inline
1910 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1911 ThreadFrame *ref_frame, int x_off, int y_off,
1912 int bx_off, int by_off, int block_w, int block_h,
1913 int width, int height, VP56mv *mv)
1918 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1919 ref_frame, mv, x_off + bx_off, y_off + by_off,
1920 block_w, block_h, width, height, s->linesize,
1921 s->put_pixels_tab[block_w == 8]);
1924 if (s->profile == 3) {
1925 /* this block only applies VP8; it is safe to check
1926 * only the profile, as VP7 profile <= 1 */
1938 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1939 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1940 &uvmv, x_off + bx_off, y_off + by_off,
1941 block_w, block_h, width, height, s->uvlinesize,
1942 s->put_pixels_tab[1 + (block_w == 4)]);
1945 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1946 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1947 static av_always_inline
1948 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1951 /* Don't prefetch refs that haven't been used very often this frame. */
1952 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1953 int x_off = mb_x << 4, y_off = mb_y << 4;
1954 int mx = (mb->mv.x >> 2) + x_off + 8;
1955 int my = (mb->mv.y >> 2) + y_off;
1956 uint8_t **src = s->framep[ref]->tf.f->data;
1957 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1958 /* For threading, a ff_thread_await_progress here might be useful, but
1959 * it actually slows down the decoder. Since a bad prefetch doesn't
1960 * generate bad decoder output, we don't run it here. */
1961 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1962 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1963 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1968 * Apply motion vectors to prediction buffer, chapter 18.
1970 static av_always_inline
1971 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1972 VP8Macroblock *mb, int mb_x, int mb_y)
1974 int x_off = mb_x << 4, y_off = mb_y << 4;
1975 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1976 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1977 VP56mv *bmv = mb->bmv;
1979 switch (mb->partitioning) {
1980 case VP8_SPLITMVMODE_NONE:
1981 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1982 0, 0, 16, 16, width, height, &mb->mv);
1984 case VP8_SPLITMVMODE_4x4: {
1989 for (y = 0; y < 4; y++) {
1990 for (x = 0; x < 4; x++) {
1991 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1992 ref, &bmv[4 * y + x],
1993 4 * x + x_off, 4 * y + y_off, 4, 4,
1994 width, height, s->linesize,
1995 s->put_pixels_tab[2]);
2004 for (y = 0; y < 2; y++) {
2005 for (x = 0; x < 2; x++) {
2006 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2007 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2008 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2009 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2010 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2011 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2012 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2013 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2014 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2015 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2016 if (s->profile == 3) {
2020 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2021 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2022 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2023 width, height, s->uvlinesize,
2024 s->put_pixels_tab[2]);
2029 case VP8_SPLITMVMODE_16x8:
2030 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2031 0, 0, 16, 8, width, height, &bmv[0]);
2032 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2033 0, 8, 16, 8, width, height, &bmv[1]);
2035 case VP8_SPLITMVMODE_8x16:
2036 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2037 0, 0, 8, 16, width, height, &bmv[0]);
2038 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2039 8, 0, 8, 16, width, height, &bmv[1]);
2041 case VP8_SPLITMVMODE_8x8:
2042 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2043 0, 0, 8, 8, width, height, &bmv[0]);
2044 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2045 8, 0, 8, 8, width, height, &bmv[1]);
2046 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2047 0, 8, 8, 8, width, height, &bmv[2]);
2048 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2049 8, 8, 8, 8, width, height, &bmv[3]);
2054 static av_always_inline
2055 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2059 if (mb->mode != MODE_I4x4) {
2060 uint8_t *y_dst = dst[0];
2061 for (y = 0; y < 4; y++) {
2062 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2064 if (nnz4 & ~0x01010101) {
2065 for (x = 0; x < 4; x++) {
2066 if ((uint8_t) nnz4 == 1)
2067 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2070 else if ((uint8_t) nnz4 > 1)
2071 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2079 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2082 y_dst += 4 * s->linesize;
2086 for (ch = 0; ch < 2; ch++) {
2087 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2089 uint8_t *ch_dst = dst[1 + ch];
2090 if (nnz4 & ~0x01010101) {
2091 for (y = 0; y < 2; y++) {
2092 for (x = 0; x < 2; x++) {
2093 if ((uint8_t) nnz4 == 1)
2094 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2095 td->block[4 + ch][(y << 1) + x],
2097 else if ((uint8_t) nnz4 > 1)
2098 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2099 td->block[4 + ch][(y << 1) + x],
2103 goto chroma_idct_end;
2105 ch_dst += 4 * s->uvlinesize;
2108 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2116 static av_always_inline
2117 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2118 VP8FilterStrength *f, int is_vp7)
2120 int interior_limit, filter_level;
2122 if (s->segmentation.enabled) {
2123 filter_level = s->segmentation.filter_level[mb->segment];
2124 if (!s->segmentation.absolute_vals)
2125 filter_level += s->filter.level;
2127 filter_level = s->filter.level;
2129 if (s->lf_delta.enabled) {
2130 filter_level += s->lf_delta.ref[mb->ref_frame];
2131 filter_level += s->lf_delta.mode[mb->mode];
2134 filter_level = av_clip_uintp2(filter_level, 6);
2136 interior_limit = filter_level;
2137 if (s->filter.sharpness) {
2138 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2139 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2141 interior_limit = FFMAX(interior_limit, 1);
2143 f->filter_level = filter_level;
2144 f->inner_limit = interior_limit;
2145 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2146 mb->mode == VP8_MVMODE_SPLIT;
2149 static av_always_inline
2150 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2151 int mb_x, int mb_y, int is_vp7)
2153 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2154 int filter_level = f->filter_level;
2155 int inner_limit = f->inner_limit;
2156 int inner_filter = f->inner_filter;
2157 ptrdiff_t linesize = s->linesize;
2158 ptrdiff_t uvlinesize = s->uvlinesize;
2159 static const uint8_t hev_thresh_lut[2][64] = {
2160 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2161 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2162 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2164 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2165 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2166 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2174 bedge_lim_y = filter_level;
2175 bedge_lim_uv = filter_level * 2;
2176 mbedge_lim = filter_level + 2;
2179 bedge_lim_uv = filter_level * 2 + inner_limit;
2180 mbedge_lim = bedge_lim_y + 4;
2183 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2186 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2187 mbedge_lim, inner_limit, hev_thresh);
2188 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2189 mbedge_lim, inner_limit, hev_thresh);
2192 #define H_LOOP_FILTER_16Y_INNER(cond) \
2193 if (cond && inner_filter) { \
2194 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2195 bedge_lim_y, inner_limit, \
2197 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2198 bedge_lim_y, inner_limit, \
2200 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2201 bedge_lim_y, inner_limit, \
2203 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2204 uvlinesize, bedge_lim_uv, \
2205 inner_limit, hev_thresh); \
2208 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2211 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2212 mbedge_lim, inner_limit, hev_thresh);
2213 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2214 mbedge_lim, inner_limit, hev_thresh);
2218 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2219 linesize, bedge_lim_y,
2220 inner_limit, hev_thresh);
2221 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2222 linesize, bedge_lim_y,
2223 inner_limit, hev_thresh);
2224 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2225 linesize, bedge_lim_y,
2226 inner_limit, hev_thresh);
2227 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2228 dst[2] + 4 * uvlinesize,
2229 uvlinesize, bedge_lim_uv,
2230 inner_limit, hev_thresh);
2233 H_LOOP_FILTER_16Y_INNER(is_vp7)
2236 static av_always_inline
2237 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2240 int mbedge_lim, bedge_lim;
2241 int filter_level = f->filter_level;
2242 int inner_limit = f->inner_limit;
2243 int inner_filter = f->inner_filter;
2244 ptrdiff_t linesize = s->linesize;
2249 bedge_lim = 2 * filter_level + inner_limit;
2250 mbedge_lim = bedge_lim + 4;
2253 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2255 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2256 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2257 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2261 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2263 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2264 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2265 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2269 #define MARGIN (16 << 2)
2270 static av_always_inline
2271 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2272 VP8Frame *prev_frame, int is_vp7)
2274 VP8Context *s = avctx->priv_data;
2277 s->mv_bounds.mv_min.y = -MARGIN;
2278 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2279 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2280 VP8Macroblock *mb = s->macroblocks_base +
2281 ((s->mb_width + 1) * (mb_y + 1) + 1);
2282 int mb_xy = mb_y * s->mb_width;
2284 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2286 s->mv_bounds.mv_min.x = -MARGIN;
2287 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2289 if (vpX_rac_is_end(&s->c)) {
2290 return AVERROR_INVALIDDATA;
2292 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2294 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2295 DC_PRED * 0x01010101);
2296 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2297 prev_frame && prev_frame->seg_map ?
2298 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2299 s->mv_bounds.mv_min.x -= 64;
2300 s->mv_bounds.mv_max.x -= 64;
2302 s->mv_bounds.mv_min.y -= 64;
2303 s->mv_bounds.mv_max.y -= 64;
2308 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2309 VP8Frame *prev_frame)
2311 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2314 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2315 VP8Frame *prev_frame)
2317 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2321 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2323 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2324 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2325 pthread_mutex_lock(&otd->lock); \
2326 atomic_store(&td->wait_mb_pos, tmp); \
2328 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2330 pthread_cond_wait(&otd->cond, &otd->lock); \
2332 atomic_store(&td->wait_mb_pos, INT_MAX); \
2333 pthread_mutex_unlock(&otd->lock); \
2337 #define update_pos(td, mb_y, mb_x) \
2339 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2340 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2342 int is_null = !next_td || !prev_td; \
2343 int pos_check = (is_null) ? 1 : \
2344 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2345 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2346 atomic_store(&td->thread_mb_pos, pos); \
2347 if (sliced_threading && pos_check) { \
2348 pthread_mutex_lock(&td->lock); \
2349 pthread_cond_broadcast(&td->cond); \
2350 pthread_mutex_unlock(&td->lock); \
2354 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2355 #define update_pos(td, mb_y, mb_x) while(0)
2358 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2359 int jobnr, int threadnr, int is_vp7)
2361 VP8Context *s = avctx->priv_data;
2362 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2363 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2364 int mb_x, mb_xy = mb_y * s->mb_width;
2365 int num_jobs = s->num_jobs;
2366 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2367 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2370 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2371 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2372 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2375 if (c->end <= c->buffer && c->bits >= 0)
2376 return AVERROR_INVALIDDATA;
2381 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2382 if (mb_y == s->mb_height - 1)
2385 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2386 if (s->mb_layout == 1)
2387 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2389 // Make sure the previous frame has read its segmentation map,
2390 // if we re-use the same map.
2391 if (prev_frame && s->segmentation.enabled &&
2392 !s->segmentation.update_map)
2393 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2394 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2395 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2396 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2399 if (!is_vp7 || mb_y == 0)
2400 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2402 td->mv_bounds.mv_min.x = -MARGIN;
2403 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2405 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2406 if (c->end <= c->buffer && c->bits >= 0)
2407 return AVERROR_INVALIDDATA;
2408 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2409 if (prev_td != td) {
2410 if (threadnr != 0) {
2411 check_thread_pos(td, prev_td,
2412 mb_x + (is_vp7 ? 2 : 1),
2413 mb_y - (is_vp7 ? 2 : 1));
2415 check_thread_pos(td, prev_td,
2416 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2417 mb_y - (is_vp7 ? 2 : 1));
2421 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2423 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2424 dst[2] - dst[1], 2);
2427 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2428 prev_frame && prev_frame->seg_map ?
2429 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2431 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2434 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2436 if (mb->mode <= MODE_I4x4)
2437 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2439 inter_predict(s, td, dst, mb, mb_x, mb_y);
2441 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2444 idct_mb(s, td, dst, mb);
2446 AV_ZERO64(td->left_nnz);
2447 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2449 /* Reset DC block predictors if they would exist
2450 * if the mb had coefficients */
2451 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2452 td->left_nnz[8] = 0;
2453 s->top_nnz[mb_x][8] = 0;
2457 if (s->deblock_filter)
2458 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2460 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2461 if (s->filter.simple)
2462 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2463 NULL, NULL, s->linesize, 0, 1);
2465 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2466 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2469 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2474 td->mv_bounds.mv_min.x -= 64;
2475 td->mv_bounds.mv_max.x -= 64;
2477 if (mb_x == s->mb_width + 1) {
2478 update_pos(td, mb_y, s->mb_width + 3);
2480 update_pos(td, mb_y, mb_x);
2486 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2487 int jobnr, int threadnr)
2489 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2492 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2493 int jobnr, int threadnr)
2495 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2498 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2499 int jobnr, int threadnr, int is_vp7)
2501 VP8Context *s = avctx->priv_data;
2502 VP8ThreadData *td = &s->thread_data[threadnr];
2503 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2504 AVFrame *curframe = s->curframe->tf.f;
2506 VP8ThreadData *prev_td, *next_td;
2508 curframe->data[0] + 16 * mb_y * s->linesize,
2509 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2510 curframe->data[2] + 8 * mb_y * s->uvlinesize
2513 if (s->mb_layout == 1)
2514 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2516 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2521 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2522 if (mb_y == s->mb_height - 1)
2525 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2527 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2528 VP8FilterStrength *f = &td->filter_strength[mb_x];
2530 check_thread_pos(td, prev_td,
2531 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2533 if (next_td != &s->thread_data[0])
2534 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2536 if (num_jobs == 1) {
2537 if (s->filter.simple)
2538 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2539 NULL, NULL, s->linesize, 0, 1);
2541 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2542 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2545 if (s->filter.simple)
2546 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2548 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2553 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2557 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2558 int jobnr, int threadnr)
2560 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2563 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2564 int jobnr, int threadnr)
2566 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2569 static av_always_inline
2570 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2571 int threadnr, int is_vp7)
2573 VP8Context *s = avctx->priv_data;
2574 VP8ThreadData *td = &s->thread_data[jobnr];
2575 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2576 VP8Frame *curframe = s->curframe;
2577 int mb_y, num_jobs = s->num_jobs;
2580 td->thread_nr = threadnr;
2581 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2582 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2583 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2584 atomic_store(&td->thread_mb_pos, mb_y << 16);
2585 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2587 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2590 if (s->deblock_filter)
2591 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2592 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2594 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2595 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2597 if (avctx->active_thread_type == FF_THREAD_FRAME)
2598 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2604 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2605 int jobnr, int threadnr)
2607 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2610 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2611 int jobnr, int threadnr)
2613 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2616 static av_always_inline
2617 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2618 AVPacket *avpkt, int is_vp7)
2620 VP8Context *s = avctx->priv_data;
2621 int ret, i, referenced, num_jobs;
2622 enum AVDiscard skip_thresh;
2623 VP8Frame *av_uninit(curframe), *prev_frame;
2626 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2628 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2633 if (s->actually_webp) {
2634 // avctx->pix_fmt already set in caller.
2635 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2636 s->pix_fmt = get_pixel_format(s);
2637 if (s->pix_fmt < 0) {
2638 ret = AVERROR(EINVAL);
2641 avctx->pix_fmt = s->pix_fmt;
2644 prev_frame = s->framep[VP56_FRAME_CURRENT];
2646 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2647 s->update_altref == VP56_FRAME_CURRENT;
2649 skip_thresh = !referenced ? AVDISCARD_NONREF
2650 : !s->keyframe ? AVDISCARD_NONKEY
2653 if (avctx->skip_frame >= skip_thresh) {
2655 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2658 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2660 // release no longer referenced frames
2661 for (i = 0; i < 5; i++)
2662 if (s->frames[i].tf.f->buf[0] &&
2663 &s->frames[i] != prev_frame &&
2664 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2665 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2666 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2667 vp8_release_frame(s, &s->frames[i]);
2669 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2672 avctx->colorspace = AVCOL_SPC_BT470BG;
2674 avctx->color_range = AVCOL_RANGE_JPEG;
2676 avctx->color_range = AVCOL_RANGE_MPEG;
2678 /* Given that arithmetic probabilities are updated every frame, it's quite
2679 * likely that the values we have on a random interframe are complete
2680 * junk if we didn't start decode on a keyframe. So just don't display
2681 * anything rather than junk. */
2682 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2683 !s->framep[VP56_FRAME_GOLDEN] ||
2684 !s->framep[VP56_FRAME_GOLDEN2])) {
2685 av_log(avctx, AV_LOG_WARNING,
2686 "Discarding interframe without a prior keyframe!\n");
2687 ret = AVERROR_INVALIDDATA;
2691 curframe->tf.f->key_frame = s->keyframe;
2692 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2693 : AV_PICTURE_TYPE_P;
2694 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2697 // check if golden and altref are swapped
2698 if (s->update_altref != VP56_FRAME_NONE)
2699 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2701 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2703 if (s->update_golden != VP56_FRAME_NONE)
2704 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2706 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2709 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2711 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2713 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2715 ff_thread_finish_setup(avctx);
2717 if (avctx->hwaccel) {
2718 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2722 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2726 ret = avctx->hwaccel->end_frame(avctx);
2731 s->linesize = curframe->tf.f->linesize[0];
2732 s->uvlinesize = curframe->tf.f->linesize[1];
2734 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2735 /* Zero macroblock structures for top/top-left prediction
2736 * from outside the frame. */
2738 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2739 (s->mb_width + 1) * sizeof(*s->macroblocks));
2740 if (!s->mb_layout && s->keyframe)
2741 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2743 memset(s->ref_count, 0, sizeof(s->ref_count));
2745 if (s->mb_layout == 1) {
2746 // Make sure the previous frame has read its segmentation map,
2747 // if we re-use the same map.
2748 if (prev_frame && s->segmentation.enabled &&
2749 !s->segmentation.update_map)
2750 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2752 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2754 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2759 if (avctx->active_thread_type == FF_THREAD_FRAME)
2762 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2763 s->num_jobs = num_jobs;
2764 s->curframe = curframe;
2765 s->prev_frame = prev_frame;
2766 s->mv_bounds.mv_min.y = -MARGIN;
2767 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2768 for (i = 0; i < MAX_THREADS; i++) {
2769 VP8ThreadData *td = &s->thread_data[i];
2770 atomic_init(&td->thread_mb_pos, 0);
2771 atomic_init(&td->wait_mb_pos, INT_MAX);
2774 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2777 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2781 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2782 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2785 // if future frames don't use the updated probabilities,
2786 // reset them to the values we saved
2787 if (!s->update_probabilities)
2788 s->prob[0] = s->prob[1];
2790 if (!s->invisible) {
2791 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2798 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2802 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2805 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2808 #if CONFIG_VP7_DECODER
2809 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2812 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2814 #endif /* CONFIG_VP7_DECODER */
2816 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2818 VP8Context *s = avctx->priv_data;
2824 vp8_decode_flush_impl(avctx, 1);
2825 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2826 av_frame_free(&s->frames[i].tf.f);
2831 static av_cold int vp8_init_frames(VP8Context *s)
2834 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2835 s->frames[i].tf.f = av_frame_alloc();
2836 if (!s->frames[i].tf.f)
2837 return AVERROR(ENOMEM);
2842 static av_always_inline
2843 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2845 VP8Context *s = avctx->priv_data;
2849 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2850 s->pix_fmt = AV_PIX_FMT_NONE;
2851 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2852 avctx->internal->allocate_progress = 1;
2854 ff_videodsp_init(&s->vdsp, 8);
2856 ff_vp78dsp_init(&s->vp8dsp);
2857 if (CONFIG_VP7_DECODER && is_vp7) {
2858 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2859 ff_vp7dsp_init(&s->vp8dsp);
2860 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2861 s->filter_mb_row = vp7_filter_mb_row;
2862 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2863 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2864 ff_vp8dsp_init(&s->vp8dsp);
2865 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2866 s->filter_mb_row = vp8_filter_mb_row;
2869 /* does not change for VP8 */
2870 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2872 if ((ret = vp8_init_frames(s)) < 0) {
2873 ff_vp8_decode_free(avctx);
2880 #if CONFIG_VP7_DECODER
2881 static int vp7_decode_init(AVCodecContext *avctx)
2883 return vp78_decode_init(avctx, IS_VP7);
2885 #endif /* CONFIG_VP7_DECODER */
2887 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2889 return vp78_decode_init(avctx, IS_VP8);
2892 #if CONFIG_VP8_DECODER
2894 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2896 VP8Context *s = avctx->priv_data;
2901 if ((ret = vp8_init_frames(s)) < 0) {
2902 ff_vp8_decode_free(avctx);
2909 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2911 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2912 const AVCodecContext *src)
2914 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2917 if (s->macroblocks_base &&
2918 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2920 s->mb_width = s_src->mb_width;
2921 s->mb_height = s_src->mb_height;
2924 s->pix_fmt = s_src->pix_fmt;
2925 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2926 s->segmentation = s_src->segmentation;
2927 s->lf_delta = s_src->lf_delta;
2928 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2930 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2931 if (s_src->frames[i].tf.f->buf[0]) {
2932 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2938 s->framep[0] = REBASE(s_src->next_framep[0]);
2939 s->framep[1] = REBASE(s_src->next_framep[1]);
2940 s->framep[2] = REBASE(s_src->next_framep[2]);
2941 s->framep[3] = REBASE(s_src->next_framep[3]);
2945 #endif /* HAVE_THREADS */
2946 #endif /* CONFIG_VP8_DECODER */
2948 #if CONFIG_VP7_DECODER
2949 AVCodec ff_vp7_decoder = {
2951 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2952 .type = AVMEDIA_TYPE_VIDEO,
2953 .id = AV_CODEC_ID_VP7,
2954 .priv_data_size = sizeof(VP8Context),
2955 .init = vp7_decode_init,
2956 .close = ff_vp8_decode_free,
2957 .decode = vp7_decode_frame,
2958 .capabilities = AV_CODEC_CAP_DR1,
2959 .flush = vp8_decode_flush,
2961 #endif /* CONFIG_VP7_DECODER */
2963 #if CONFIG_VP8_DECODER
2964 AVCodec ff_vp8_decoder = {
2966 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2967 .type = AVMEDIA_TYPE_VIDEO,
2968 .id = AV_CODEC_ID_VP8,
2969 .priv_data_size = sizeof(VP8Context),
2970 .init = ff_vp8_decode_init,
2971 .close = ff_vp8_decode_free,
2972 .decode = ff_vp8_decode_frame,
2973 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2974 AV_CODEC_CAP_SLICE_THREADS,
2975 .flush = vp8_decode_flush,
2976 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2977 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2978 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2979 #if CONFIG_VP8_VAAPI_HWACCEL
2982 #if CONFIG_VP8_NVDEC_HWACCEL
2988 #endif /* CONFIG_VP7_DECODER */