2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
50 static void free_buffers(VP8Context *s)
54 for (i = 0; i < MAX_THREADS; i++) {
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
59 av_freep(&s->thread_data[i].filter_strength);
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
67 s->macroblocks = NULL;
70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
103 #if CONFIG_VP8_DECODER
104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
108 vp8_release_frame(s, dst);
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
126 #endif /* CONFIG_VP8_DECODER */
128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 VP8Context *s = avctx->priv_data;
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
141 static void vp8_decode_flush(AVCodecContext *avctx)
143 vp8_decode_flush_impl(avctx, 0);
146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 VP8Frame *frame = NULL;
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
176 #if CONFIG_VP8_NVDEC_HWACCEL
183 return ff_get_format(s->avctx, pix_fmts);
186 static av_always_inline
187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189 AVCodecContext *avctx = s->avctx;
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
196 ret = ff_set_dimensions(s->avctx, width, height);
201 if (!s->actually_webp && !is_vp7) {
202 s->pix_fmt = get_pixel_format(s);
204 return AVERROR(EINVAL);
205 avctx->pix_fmt = s->pix_fmt;
208 s->mb_width = (s->avctx->coded_width + 15) / 16;
209 s->mb_height = (s->avctx->coded_height + 15) / 16;
211 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
212 avctx->thread_count > 1;
213 if (!s->mb_layout) { // Frame threading and one thread
214 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
215 sizeof(*s->macroblocks));
216 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
217 } else // Sliced threading
218 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
219 sizeof(*s->macroblocks));
220 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
221 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
222 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
224 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
225 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
227 return AVERROR(ENOMEM);
230 for (i = 0; i < MAX_THREADS; i++) {
231 s->thread_data[i].filter_strength =
232 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
233 if (!s->thread_data[i].filter_strength) {
235 return AVERROR(ENOMEM);
238 pthread_mutex_init(&s->thread_data[i].lock, NULL);
239 pthread_cond_init(&s->thread_data[i].cond, NULL);
243 s->macroblocks = s->macroblocks_base + 1;
248 static int vp7_update_dimensions(VP8Context *s, int width, int height)
250 return update_dimensions(s, width, height, IS_VP7);
253 static int vp8_update_dimensions(VP8Context *s, int width, int height)
255 return update_dimensions(s, width, height, IS_VP8);
259 static void parse_segment_info(VP8Context *s)
261 VP56RangeCoder *c = &s->c;
264 s->segmentation.update_map = vp8_rac_get(c);
265 s->segmentation.update_feature_data = vp8_rac_get(c);
267 if (s->segmentation.update_feature_data) {
268 s->segmentation.absolute_vals = vp8_rac_get(c);
270 for (i = 0; i < 4; i++)
271 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
273 for (i = 0; i < 4; i++)
274 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
276 if (s->segmentation.update_map)
277 for (i = 0; i < 3; i++)
278 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
281 static void update_lf_deltas(VP8Context *s)
283 VP56RangeCoder *c = &s->c;
286 for (i = 0; i < 4; i++) {
287 if (vp8_rac_get(c)) {
288 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
291 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
296 if (vp8_rac_get(c)) {
297 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
300 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
305 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
307 const uint8_t *sizes = buf;
311 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
313 buf += 3 * (s->num_coeff_partitions - 1);
314 buf_size -= 3 * (s->num_coeff_partitions - 1);
318 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
319 int size = AV_RL24(sizes + 3 * i);
320 if (buf_size - size < 0)
322 s->coeff_partition_size[i] = size;
324 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
331 s->coeff_partition_size[i] = buf_size;
332 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
337 static void vp7_get_quants(VP8Context *s)
339 VP56RangeCoder *c = &s->c;
341 int yac_qi = vp8_rac_get_uint(c, 7);
342 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
343 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
344 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
345 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
349 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
350 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
351 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
352 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
353 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
356 static void vp8_get_quants(VP8Context *s)
358 VP56RangeCoder *c = &s->c;
361 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
362 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
363 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
364 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
365 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
368 for (i = 0; i < 4; i++) {
369 if (s->segmentation.enabled) {
370 base_qi = s->segmentation.base_quant[i];
371 if (!s->segmentation.absolute_vals)
372 base_qi += s->quant.yac_qi;
374 base_qi = s->quant.yac_qi;
376 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
377 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
378 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
379 /* 101581>>16 is equivalent to 155/100 */
380 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
381 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
382 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
384 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
385 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
390 * Determine which buffers golden and altref should be updated with after this frame.
391 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
393 * Intra frames update all 3 references
394 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
395 * If the update (golden|altref) flag is set, it's updated with the current frame
396 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
397 * If the flag is not set, the number read means:
399 * 1: VP56_FRAME_PREVIOUS
400 * 2: update golden with altref, or update altref with golden
402 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
404 VP56RangeCoder *c = &s->c;
407 return VP56_FRAME_CURRENT;
409 switch (vp8_rac_get_uint(c, 2)) {
411 return VP56_FRAME_PREVIOUS;
413 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
415 return VP56_FRAME_NONE;
418 static void vp78_reset_probability_tables(VP8Context *s)
421 for (i = 0; i < 4; i++)
422 for (j = 0; j < 16; j++)
423 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
424 sizeof(s->prob->token[i][j]));
427 static void vp78_update_probability_tables(VP8Context *s)
429 VP56RangeCoder *c = &s->c;
432 for (i = 0; i < 4; i++)
433 for (j = 0; j < 8; j++)
434 for (k = 0; k < 3; k++)
435 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
436 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
437 int prob = vp8_rac_get_uint(c, 8);
438 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
439 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443 #define VP7_MVC_SIZE 17
444 #define VP8_MVC_SIZE 19
446 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
449 VP56RangeCoder *c = &s->c;
453 for (i = 0; i < 4; i++)
454 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
456 for (i = 0; i < 3; i++)
457 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
459 // 17.2 MV probability update
460 for (i = 0; i < 2; i++)
461 for (j = 0; j < mvc_size; j++)
462 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
463 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
466 static void update_refs(VP8Context *s)
468 VP56RangeCoder *c = &s->c;
470 int update_golden = vp8_rac_get(c);
471 int update_altref = vp8_rac_get(c);
473 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
474 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
477 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481 for (j = 1; j < 3; j++) {
482 for (i = 0; i < height / 2; i++)
483 memcpy(dst->data[j] + i * dst->linesize[j],
484 src->data[j] + i * src->linesize[j], width / 2);
488 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
489 const uint8_t *src, ptrdiff_t src_linesize,
490 int width, int height,
494 for (j = 0; j < height; j++) {
495 const uint8_t *src2 = src + j * src_linesize;
496 uint8_t *dst2 = dst + j * dst_linesize;
497 for (i = 0; i < width; i++) {
499 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
504 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
508 if (!s->keyframe && (alpha || beta)) {
509 int width = s->mb_width * 16;
510 int height = s->mb_height * 16;
513 if (!s->framep[VP56_FRAME_PREVIOUS] ||
514 !s->framep[VP56_FRAME_GOLDEN]) {
515 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
516 return AVERROR_INVALIDDATA;
520 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
522 /* preserve the golden frame, write a new previous frame */
523 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
524 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
525 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
528 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
530 copy_chroma(dst, src, width, height);
533 fade(dst->data[0], dst->linesize[0],
534 src->data[0], src->linesize[0],
535 width, height, alpha, beta);
541 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
543 VP56RangeCoder *c = &s->c;
544 int part1_size, hscale, vscale, i, j, ret;
545 int width = s->avctx->width;
546 int height = s->avctx->height;
551 return AVERROR_INVALIDDATA;
554 s->profile = (buf[0] >> 1) & 7;
555 if (s->profile > 1) {
556 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
557 return AVERROR_INVALIDDATA;
560 s->keyframe = !(buf[0] & 1);
562 part1_size = AV_RL24(buf) >> 4;
564 if (buf_size < 4 - s->profile + part1_size) {
565 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
566 return AVERROR_INVALIDDATA;
569 buf += 4 - s->profile;
570 buf_size -= 4 - s->profile;
572 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
574 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
578 buf_size -= part1_size;
580 /* A. Dimension information (keyframes only) */
582 width = vp8_rac_get_uint(c, 12);
583 height = vp8_rac_get_uint(c, 12);
584 hscale = vp8_rac_get_uint(c, 2);
585 vscale = vp8_rac_get_uint(c, 2);
586 if (hscale || vscale)
587 avpriv_request_sample(s->avctx, "Upscaling");
589 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
590 vp78_reset_probability_tables(s);
591 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
592 sizeof(s->prob->pred16x16));
593 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
594 sizeof(s->prob->pred8x8c));
595 for (i = 0; i < 2; i++)
596 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
597 sizeof(vp7_mv_default_prob[i]));
598 memset(&s->segmentation, 0, sizeof(s->segmentation));
599 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
600 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
603 if (s->keyframe || s->profile > 0)
604 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
606 /* B. Decoding information for all four macroblock-level features */
607 for (i = 0; i < 4; i++) {
608 s->feature_enabled[i] = vp8_rac_get(c);
609 if (s->feature_enabled[i]) {
610 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
612 for (j = 0; j < 3; j++)
613 s->feature_index_prob[i][j] =
614 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
616 if (vp7_feature_value_size[s->profile][i])
617 for (j = 0; j < 4; j++)
618 s->feature_value[i][j] =
619 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
623 s->segmentation.enabled = 0;
624 s->segmentation.update_map = 0;
625 s->lf_delta.enabled = 0;
627 s->num_coeff_partitions = 1;
628 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
632 if (!s->macroblocks_base || /* first frame */
633 width != s->avctx->width || height != s->avctx->height ||
634 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
635 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
639 /* C. Dequantization indices */
642 /* D. Golden frame update flag (a Flag) for interframes only */
644 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
645 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
649 s->update_probabilities = 1;
652 if (s->profile > 0) {
653 s->update_probabilities = vp8_rac_get(c);
654 if (!s->update_probabilities)
655 s->prob[1] = s->prob[0];
658 s->fade_present = vp8_rac_get(c);
661 if (vpX_rac_is_end(c))
662 return AVERROR_INVALIDDATA;
663 /* E. Fading information for previous frame */
664 if (s->fade_present && vp8_rac_get(c)) {
665 alpha = (int8_t) vp8_rac_get_uint(c, 8);
666 beta = (int8_t) vp8_rac_get_uint(c, 8);
669 /* F. Loop filter type */
671 s->filter.simple = vp8_rac_get(c);
673 /* G. DCT coefficient ordering specification */
675 for (i = 1; i < 16; i++)
676 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
678 /* H. Loop filter levels */
680 s->filter.simple = vp8_rac_get(c);
681 s->filter.level = vp8_rac_get_uint(c, 6);
682 s->filter.sharpness = vp8_rac_get_uint(c, 3);
684 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
685 vp78_update_probability_tables(s);
687 s->mbskip_enabled = 0;
689 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
691 s->prob->intra = vp8_rac_get_uint(c, 8);
692 s->prob->last = vp8_rac_get_uint(c, 8);
693 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
696 if (vpX_rac_is_end(c))
697 return AVERROR_INVALIDDATA;
699 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
705 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
707 VP56RangeCoder *c = &s->c;
708 int header_size, hscale, vscale, ret;
709 int width = s->avctx->width;
710 int height = s->avctx->height;
713 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
714 return AVERROR_INVALIDDATA;
717 s->keyframe = !(buf[0] & 1);
718 s->profile = (buf[0]>>1) & 7;
719 s->invisible = !(buf[0] & 0x10);
720 header_size = AV_RL24(buf) >> 5;
724 s->header_partition_size = header_size;
727 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
730 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
731 sizeof(s->put_pixels_tab));
732 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
733 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
734 sizeof(s->put_pixels_tab));
736 if (header_size > buf_size - 7 * s->keyframe) {
737 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
738 return AVERROR_INVALIDDATA;
742 if (AV_RL24(buf) != 0x2a019d) {
743 av_log(s->avctx, AV_LOG_ERROR,
744 "Invalid start code 0x%x\n", AV_RL24(buf));
745 return AVERROR_INVALIDDATA;
747 width = AV_RL16(buf + 3) & 0x3fff;
748 height = AV_RL16(buf + 5) & 0x3fff;
749 hscale = buf[4] >> 6;
750 vscale = buf[6] >> 6;
754 if (hscale || vscale)
755 avpriv_request_sample(s->avctx, "Upscaling");
757 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
758 vp78_reset_probability_tables(s);
759 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
760 sizeof(s->prob->pred16x16));
761 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
762 sizeof(s->prob->pred8x8c));
763 memcpy(s->prob->mvc, vp8_mv_default_prob,
764 sizeof(s->prob->mvc));
765 memset(&s->segmentation, 0, sizeof(s->segmentation));
766 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
769 ret = ff_vp56_init_range_decoder(c, buf, header_size);
773 buf_size -= header_size;
776 s->colorspace = vp8_rac_get(c);
778 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
779 s->fullrange = vp8_rac_get(c);
782 if ((s->segmentation.enabled = vp8_rac_get(c)))
783 parse_segment_info(s);
785 s->segmentation.update_map = 0; // FIXME: move this to some init function?
787 s->filter.simple = vp8_rac_get(c);
788 s->filter.level = vp8_rac_get_uint(c, 6);
789 s->filter.sharpness = vp8_rac_get_uint(c, 3);
791 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
792 s->lf_delta.update = vp8_rac_get(c);
793 if (s->lf_delta.update)
797 if (setup_partitions(s, buf, buf_size)) {
798 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
799 return AVERROR_INVALIDDATA;
802 if (!s->macroblocks_base || /* first frame */
803 width != s->avctx->width || height != s->avctx->height ||
804 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
805 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
812 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
813 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
816 // if we aren't saving this frame's probabilities for future frames,
817 // make a copy of the current probabilities
818 if (!(s->update_probabilities = vp8_rac_get(c)))
819 s->prob[1] = s->prob[0];
821 s->update_last = s->keyframe || vp8_rac_get(c);
823 vp78_update_probability_tables(s);
825 if ((s->mbskip_enabled = vp8_rac_get(c)))
826 s->prob->mbskip = vp8_rac_get_uint(c, 8);
829 s->prob->intra = vp8_rac_get_uint(c, 8);
830 s->prob->last = vp8_rac_get_uint(c, 8);
831 s->prob->golden = vp8_rac_get_uint(c, 8);
832 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
835 // Record the entropy coder state here so that hwaccels can use it.
836 s->c.code_word = vp56_rac_renorm(&s->c);
837 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
838 s->coder_state_at_header_end.range = s->c.high;
839 s->coder_state_at_header_end.value = s->c.code_word >> 16;
840 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
845 static av_always_inline
846 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
848 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
849 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
850 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
851 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
855 * Motion vector coding, 17.1.
857 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
861 if (vp56_rac_get_prob_branchy(c, p[0])) {
864 for (i = 0; i < 3; i++)
865 x += vp56_rac_get_prob(c, p[9 + i]) << i;
866 for (i = (vp7 ? 7 : 9); i > 3; i--)
867 x += vp56_rac_get_prob(c, p[9 + i]) << i;
868 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
872 const uint8_t *ps = p + 2;
873 bit = vp56_rac_get_prob(c, *ps);
876 bit = vp56_rac_get_prob(c, *ps);
879 x += vp56_rac_get_prob(c, *ps);
882 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
885 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
887 return read_mv_component(c, p, 1);
890 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
892 return read_mv_component(c, p, 0);
895 static av_always_inline
896 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
899 return vp7_submv_prob;
902 return vp8_submv_prob[4 - !!left];
904 return vp8_submv_prob[2];
905 return vp8_submv_prob[1 - !!left];
909 * Split motion vector prediction, 16.4.
910 * @returns the number of motion vectors parsed (2, 4 or 16)
912 static av_always_inline
913 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
914 int layout, int is_vp7)
918 VP8Macroblock *top_mb;
919 VP8Macroblock *left_mb = &mb[-1];
920 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
921 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
923 VP56mv *left_mv = left_mb->bmv;
924 VP56mv *cur_mv = mb->bmv;
926 if (!layout) // layout is inlined, s->mb_layout is not
929 top_mb = &mb[-s->mb_width - 1];
930 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
931 top_mv = top_mb->bmv;
933 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
934 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
935 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
937 part_idx = VP8_SPLITMVMODE_8x8;
939 part_idx = VP8_SPLITMVMODE_4x4;
942 num = vp8_mbsplit_count[part_idx];
943 mbsplits_cur = vp8_mbsplits[part_idx],
944 firstidx = vp8_mbfirstidx[part_idx];
945 mb->partitioning = part_idx;
947 for (n = 0; n < num; n++) {
949 uint32_t left, above;
950 const uint8_t *submv_prob;
953 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
955 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
957 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
959 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
961 submv_prob = get_submv_prob(left, above, is_vp7);
963 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
964 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
965 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
966 mb->bmv[n].y = mb->mv.y +
967 read_mv_component(c, s->prob->mvc[0], is_vp7);
968 mb->bmv[n].x = mb->mv.x +
969 read_mv_component(c, s->prob->mvc[1], is_vp7);
971 AV_ZERO32(&mb->bmv[n]);
974 AV_WN32A(&mb->bmv[n], above);
977 AV_WN32A(&mb->bmv[n], left);
985 * The vp7 reference decoder uses a padding macroblock column (added to right
986 * edge of the frame) to guard against illegal macroblock offsets. The
987 * algorithm has bugs that permit offsets to straddle the padding column.
988 * This function replicates those bugs.
990 * @param[out] edge_x macroblock x address
991 * @param[out] edge_y macroblock y address
993 * @return macroblock offset legal (boolean)
995 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
996 int xoffset, int yoffset, int boundary,
997 int *edge_x, int *edge_y)
999 int vwidth = mb_width + 1;
1000 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1001 if (new < boundary || new % vwidth == vwidth - 1)
1003 *edge_y = new / vwidth;
1004 *edge_x = new % vwidth;
1008 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1010 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1013 static av_always_inline
1014 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1015 int mb_x, int mb_y, int layout)
1017 VP8Macroblock *mb_edge[12];
1018 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1019 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1022 uint8_t cnt[3] = { 0 };
1023 VP56RangeCoder *c = &s->c;
1026 AV_ZERO32(&near_mv[0]);
1027 AV_ZERO32(&near_mv[1]);
1028 AV_ZERO32(&near_mv[2]);
1030 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1031 const VP7MVPred * pred = &vp7_mv_pred[i];
1034 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1035 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1036 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1037 ? s->macroblocks_base + 1 + edge_x +
1038 (s->mb_width + 1) * (edge_y + 1)
1039 : s->macroblocks + edge_x +
1040 (s->mb_height - edge_y - 1) * 2;
1041 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1043 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1044 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1046 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1047 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1051 AV_WN32A(&near_mv[CNT_NEAR], mv);
1055 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1064 cnt[idx] += vp7_mv_pred[i].score;
1067 mb->partitioning = VP8_SPLITMVMODE_NONE;
1069 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1070 mb->mode = VP8_MVMODE_MV;
1072 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1074 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1076 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1077 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1079 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1081 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1082 mb->mode = VP8_MVMODE_SPLIT;
1083 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1085 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1086 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1087 mb->bmv[0] = mb->mv;
1090 mb->mv = near_mv[CNT_NEAR];
1091 mb->bmv[0] = mb->mv;
1094 mb->mv = near_mv[CNT_NEAREST];
1095 mb->bmv[0] = mb->mv;
1098 mb->mode = VP8_MVMODE_ZERO;
1100 mb->bmv[0] = mb->mv;
1104 static av_always_inline
1105 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1106 int mb_x, int mb_y, int layout)
1108 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1111 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1112 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1114 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1115 int8_t *sign_bias = s->sign_bias;
1117 uint8_t cnt[4] = { 0 };
1118 VP56RangeCoder *c = &s->c;
1120 if (!layout) { // layout is inlined (s->mb_layout is not)
1121 mb_edge[0] = mb + 2;
1122 mb_edge[2] = mb + 1;
1124 mb_edge[0] = mb - s->mb_width - 1;
1125 mb_edge[2] = mb - s->mb_width - 2;
1128 AV_ZERO32(&near_mv[0]);
1129 AV_ZERO32(&near_mv[1]);
1130 AV_ZERO32(&near_mv[2]);
1132 /* Process MB on top, left and top-left */
1133 #define MV_EDGE_CHECK(n) \
1135 VP8Macroblock *edge = mb_edge[n]; \
1136 int edge_ref = edge->ref_frame; \
1137 if (edge_ref != VP56_FRAME_CURRENT) { \
1138 uint32_t mv = AV_RN32A(&edge->mv); \
1140 if (cur_sign_bias != sign_bias[edge_ref]) { \
1141 /* SWAR negate of the values in mv. */ \
1143 mv = ((mv & 0x7fff7fff) + \
1144 0x00010001) ^ (mv & 0x80008000); \
1146 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1147 AV_WN32A(&near_mv[++idx], mv); \
1148 cnt[idx] += 1 + (n != 2); \
1150 cnt[CNT_ZERO] += 1 + (n != 2); \
1158 mb->partitioning = VP8_SPLITMVMODE_NONE;
1159 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1160 mb->mode = VP8_MVMODE_MV;
1162 /* If we have three distinct MVs, merge first and last if they're the same */
1163 if (cnt[CNT_SPLITMV] &&
1164 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1165 cnt[CNT_NEAREST] += 1;
1167 /* Swap near and nearest if necessary */
1168 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1169 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1170 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1173 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1174 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1175 /* Choose the best mv out of 0,0 and the nearest mv */
1176 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1177 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1178 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1179 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1181 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1182 mb->mode = VP8_MVMODE_SPLIT;
1183 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1185 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1186 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1187 mb->bmv[0] = mb->mv;
1190 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1191 mb->bmv[0] = mb->mv;
1194 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1195 mb->bmv[0] = mb->mv;
1198 mb->mode = VP8_MVMODE_ZERO;
1200 mb->bmv[0] = mb->mv;
1204 static av_always_inline
1205 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1206 int mb_x, int keyframe, int layout)
1208 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1211 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1212 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1217 uint8_t *const left = s->intra4x4_pred_mode_left;
1219 top = mb->intra4x4_pred_mode_top;
1221 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1222 for (y = 0; y < 4; y++) {
1223 for (x = 0; x < 4; x++) {
1225 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1226 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1227 left[y] = top[x] = *intra4x4;
1233 for (i = 0; i < 16; i++)
1234 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1235 vp8_pred4x4_prob_inter);
1239 static av_always_inline
1240 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1241 VP8Macroblock *mb, int mb_x, int mb_y,
1242 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1244 VP56RangeCoder *c = &s->c;
1245 static const char * const vp7_feature_name[] = { "q-index",
1247 "partial-golden-update",
1252 for (i = 0; i < 4; i++) {
1253 if (s->feature_enabled[i]) {
1254 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1255 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1256 s->feature_index_prob[i]);
1257 av_log(s->avctx, AV_LOG_WARNING,
1258 "Feature %s present in macroblock (value 0x%x)\n",
1259 vp7_feature_name[i], s->feature_value[i][index]);
1263 } else if (s->segmentation.update_map) {
1264 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1265 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1266 } else if (s->segmentation.enabled)
1267 *segment = ref ? *ref : *segment;
1268 mb->segment = *segment;
1270 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1273 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1274 vp8_pred16x16_prob_intra);
1276 if (mb->mode == MODE_I4x4) {
1277 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1279 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1280 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1282 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1284 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1285 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1288 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1289 vp8_pred8x8c_prob_intra);
1290 mb->ref_frame = VP56_FRAME_CURRENT;
1291 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1293 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1295 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1296 : VP56_FRAME_GOLDEN;
1298 mb->ref_frame = VP56_FRAME_PREVIOUS;
1299 s->ref_count[mb->ref_frame - 1]++;
1301 // motion vectors, 16.3
1303 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1305 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1308 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1310 if (mb->mode == MODE_I4x4)
1311 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1313 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1315 mb->ref_frame = VP56_FRAME_CURRENT;
1316 mb->partitioning = VP8_SPLITMVMODE_NONE;
1317 AV_ZERO32(&mb->bmv[0]);
1322 * @param r arithmetic bitstream reader context
1323 * @param block destination for block coefficients
1324 * @param probs probabilities to use when reading trees from the bitstream
1325 * @param i initial coeff index, 0 unless a separate DC block is coded
1326 * @param qmul array holding the dc/ac dequant factor at position 0/1
1328 * @return 0 if no coeffs were decoded
1329 * otherwise, the index of the last coeff decoded plus one
1331 static av_always_inline
1332 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1333 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1334 int i, uint8_t *token_prob, int16_t qmul[2],
1335 const uint8_t scan[16], int vp7)
1337 VP56RangeCoder c = *r;
1342 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1346 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1348 break; // invalid input; blocks should end with EOB
1349 token_prob = probs[i][0];
1355 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1357 token_prob = probs[i + 1][1];
1359 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1360 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1362 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1366 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1367 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1368 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1369 } else { // DCT_CAT2
1371 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1372 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1374 } else { // DCT_CAT3 and up
1375 int a = vp56_rac_get_prob(&c, token_prob[8]);
1376 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1377 int cat = (a << 1) + b;
1378 coeff = 3 + (8 << cat);
1379 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1382 token_prob = probs[i + 1][2];
1384 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1391 static av_always_inline
1392 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1394 int16_t dc = block[0];
1402 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1403 block[0] = pred[0] = dc;
1408 block[0] = pred[0] = dc;
1414 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1416 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1417 int i, uint8_t *token_prob,
1419 const uint8_t scan[16])
1421 return decode_block_coeffs_internal(r, block, probs, i,
1422 token_prob, qmul, scan, IS_VP7);
1425 #ifndef vp8_decode_block_coeffs_internal
1426 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1428 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1429 int i, uint8_t *token_prob,
1432 return decode_block_coeffs_internal(r, block, probs, i,
1433 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1438 * @param c arithmetic bitstream reader context
1439 * @param block destination for block coefficients
1440 * @param probs probabilities to use when reading trees from the bitstream
1441 * @param i initial coeff index, 0 unless a separate DC block is coded
1442 * @param zero_nhood the initial prediction context for number of surrounding
1443 * all-zero blocks (only left/top, so 0-2)
1444 * @param qmul array holding the dc/ac dequant factor at position 0/1
1445 * @param scan scan pattern (VP7 only)
1447 * @return 0 if no coeffs were decoded
1448 * otherwise, the index of the last coeff decoded plus one
1450 static av_always_inline
1451 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1452 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1453 int i, int zero_nhood, int16_t qmul[2],
1454 const uint8_t scan[16], int vp7)
1456 uint8_t *token_prob = probs[i][zero_nhood];
1457 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1459 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1460 token_prob, qmul, scan)
1461 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1465 static av_always_inline
1466 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1467 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1470 int i, x, y, luma_start = 0, luma_ctx = 3;
1471 int nnz_pred, nnz, nnz_total = 0;
1472 int segment = mb->segment;
1475 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1476 nnz_pred = t_nnz[8] + l_nnz[8];
1478 // decode DC values and do hadamard
1479 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1480 nnz_pred, s->qmat[segment].luma_dc_qmul,
1481 ff_zigzag_scan, is_vp7);
1482 l_nnz[8] = t_nnz[8] = !!nnz;
1484 if (is_vp7 && mb->mode > MODE_I4x4) {
1485 nnz |= inter_predict_dc(td->block_dc,
1486 s->inter_dc_pred[mb->ref_frame - 1]);
1493 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1495 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1502 for (y = 0; y < 4; y++)
1503 for (x = 0; x < 4; x++) {
1504 nnz_pred = l_nnz[y] + t_nnz[x];
1505 nnz = decode_block_coeffs(c, td->block[y][x],
1506 s->prob->token[luma_ctx],
1507 luma_start, nnz_pred,
1508 s->qmat[segment].luma_qmul,
1509 s->prob[0].scan, is_vp7);
1510 /* nnz+block_dc may be one more than the actual last index,
1511 * but we don't care */
1512 td->non_zero_count_cache[y][x] = nnz + block_dc;
1513 t_nnz[x] = l_nnz[y] = !!nnz;
1518 // TODO: what to do about dimensions? 2nd dim for luma is x,
1519 // but for chroma it's (y<<1)|x
1520 for (i = 4; i < 6; i++)
1521 for (y = 0; y < 2; y++)
1522 for (x = 0; x < 2; x++) {
1523 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1524 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1525 s->prob->token[2], 0, nnz_pred,
1526 s->qmat[segment].chroma_qmul,
1527 s->prob[0].scan, is_vp7);
1528 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1529 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1533 // if there were no coded coeffs despite the macroblock not being marked skip,
1534 // we MUST not do the inner loop filter and should not do IDCT
1535 // Since skip isn't used for bitstream prediction, just manually set it.
1540 static av_always_inline
1541 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1542 uint8_t *src_cb, uint8_t *src_cr,
1543 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1545 AV_COPY128(top_border, src_y + 15 * linesize);
1547 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1548 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1552 static av_always_inline
1553 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1554 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1555 int mb_y, int mb_width, int simple, int xchg)
1557 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1559 src_cb -= uvlinesize;
1560 src_cr -= uvlinesize;
1562 #define XCHG(a, b, xchg) \
1570 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1571 XCHG(top_border, src_y, xchg);
1572 XCHG(top_border + 8, src_y + 8, 1);
1573 if (mb_x < mb_width - 1)
1574 XCHG(top_border + 32, src_y + 16, 1);
1576 // only copy chroma for normal loop filter
1577 // or to initialize the top row to 127
1578 if (!simple || !mb_y) {
1579 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1580 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1581 XCHG(top_border + 16, src_cb, 1);
1582 XCHG(top_border + 24, src_cr, 1);
1586 static av_always_inline
1587 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1590 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1592 return mb_y ? mode : LEFT_DC_PRED8x8;
1595 static av_always_inline
1596 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1599 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1601 return mb_y ? mode : HOR_PRED8x8;
1604 static av_always_inline
1605 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1609 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1611 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1613 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1614 case PLANE_PRED8x8: /* TM */
1615 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1620 static av_always_inline
1621 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1624 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1626 return mb_y ? mode : HOR_VP8_PRED;
1630 static av_always_inline
1631 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1632 int *copy_buf, int vp7)
1636 if (!mb_x && mb_y) {
1641 case DIAG_DOWN_LEFT_PRED:
1642 case VERT_LEFT_PRED:
1643 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1651 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1653 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1654 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1655 * as 16x16/8x8 DC */
1656 case DIAG_DOWN_RIGHT_PRED:
1657 case VERT_RIGHT_PRED:
1666 static av_always_inline
1667 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1668 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1670 int x, y, mode, nnz;
1673 /* for the first row, we need to run xchg_mb_border to init the top edge
1674 * to 127 otherwise, skip it if we aren't going to deblock */
1675 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1676 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1677 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1678 s->filter.simple, 1);
1680 if (mb->mode < MODE_I4x4) {
1681 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1682 s->hpc.pred16x16[mode](dst[0], s->linesize);
1684 uint8_t *ptr = dst[0];
1685 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1686 const uint8_t lo = is_vp7 ? 128 : 127;
1687 const uint8_t hi = is_vp7 ? 128 : 129;
1688 uint8_t tr_top[4] = { lo, lo, lo, lo };
1690 // all blocks on the right edge of the macroblock use bottom edge
1691 // the top macroblock for their topright edge
1692 uint8_t *tr_right = ptr - s->linesize + 16;
1694 // if we're on the right edge of the frame, said edge is extended
1695 // from the top macroblock
1696 if (mb_y && mb_x == s->mb_width - 1) {
1697 tr = tr_right[-1] * 0x01010101u;
1698 tr_right = (uint8_t *) &tr;
1702 AV_ZERO128(td->non_zero_count_cache);
1704 for (y = 0; y < 4; y++) {
1705 uint8_t *topright = ptr + 4 - s->linesize;
1706 for (x = 0; x < 4; x++) {
1708 ptrdiff_t linesize = s->linesize;
1709 uint8_t *dst = ptr + 4 * x;
1710 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1712 if ((y == 0 || x == 3) && mb_y == 0) {
1715 topright = tr_right;
1717 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1718 mb_y + y, ©, is_vp7);
1720 dst = copy_dst + 12;
1724 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1726 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1730 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1739 copy_dst[11] = ptr[4 * x - 1];
1740 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1741 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1742 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1745 s->hpc.pred4x4[mode](dst, topright, linesize);
1747 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1748 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1749 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1750 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1753 nnz = td->non_zero_count_cache[y][x];
1756 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1757 td->block[y][x], s->linesize);
1759 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1760 td->block[y][x], s->linesize);
1765 ptr += 4 * s->linesize;
1770 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1771 mb_x, mb_y, is_vp7);
1772 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1773 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1775 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1776 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1777 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1778 s->filter.simple, 0);
1781 static const uint8_t subpel_idx[3][8] = {
1782 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1783 // also function pointer index
1784 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1785 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1791 * @param s VP8 decoding context
1792 * @param dst target buffer for block data at block position
1793 * @param ref reference picture buffer at origin (0, 0)
1794 * @param mv motion vector (relative to block position) to get pixel data from
1795 * @param x_off horizontal position of block from origin (0, 0)
1796 * @param y_off vertical position of block from origin (0, 0)
1797 * @param block_w width of block (16, 8 or 4)
1798 * @param block_h height of block (always same as block_w)
1799 * @param width width of src/dst plane data
1800 * @param height height of src/dst plane data
1801 * @param linesize size of a single line of plane data, including padding
1802 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1804 static av_always_inline
1805 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1806 ThreadFrame *ref, const VP56mv *mv,
1807 int x_off, int y_off, int block_w, int block_h,
1808 int width, int height, ptrdiff_t linesize,
1809 vp8_mc_func mc_func[3][3])
1811 uint8_t *src = ref->f->data[0];
1814 ptrdiff_t src_linesize = linesize;
1816 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1817 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1819 x_off += mv->x >> 2;
1820 y_off += mv->y >> 2;
1823 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1824 src += y_off * linesize + x_off;
1825 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1826 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1827 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1828 src - my_idx * linesize - mx_idx,
1829 EDGE_EMU_LINESIZE, linesize,
1830 block_w + subpel_idx[1][mx],
1831 block_h + subpel_idx[1][my],
1832 x_off - mx_idx, y_off - my_idx,
1834 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1835 src_linesize = EDGE_EMU_LINESIZE;
1837 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1839 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1840 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1841 linesize, block_h, 0, 0);
1846 * chroma MC function
1848 * @param s VP8 decoding context
1849 * @param dst1 target buffer for block data at block position (U plane)
1850 * @param dst2 target buffer for block data at block position (V plane)
1851 * @param ref reference picture buffer at origin (0, 0)
1852 * @param mv motion vector (relative to block position) to get pixel data from
1853 * @param x_off horizontal position of block from origin (0, 0)
1854 * @param y_off vertical position of block from origin (0, 0)
1855 * @param block_w width of block (16, 8 or 4)
1856 * @param block_h height of block (always same as block_w)
1857 * @param width width of src/dst plane data
1858 * @param height height of src/dst plane data
1859 * @param linesize size of a single line of plane data, including padding
1860 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1862 static av_always_inline
1863 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1864 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1865 int x_off, int y_off, int block_w, int block_h,
1866 int width, int height, ptrdiff_t linesize,
1867 vp8_mc_func mc_func[3][3])
1869 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1872 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1873 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1875 x_off += mv->x >> 3;
1876 y_off += mv->y >> 3;
1879 src1 += y_off * linesize + x_off;
1880 src2 += y_off * linesize + x_off;
1881 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1882 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1883 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1884 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1885 src1 - my_idx * linesize - mx_idx,
1886 EDGE_EMU_LINESIZE, linesize,
1887 block_w + subpel_idx[1][mx],
1888 block_h + subpel_idx[1][my],
1889 x_off - mx_idx, y_off - my_idx, width, height);
1890 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1891 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1893 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1894 src2 - my_idx * linesize - mx_idx,
1895 EDGE_EMU_LINESIZE, linesize,
1896 block_w + subpel_idx[1][mx],
1897 block_h + subpel_idx[1][my],
1898 x_off - mx_idx, y_off - my_idx, width, height);
1899 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1900 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1902 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1903 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1906 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1907 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1908 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1912 static av_always_inline
1913 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1914 ThreadFrame *ref_frame, int x_off, int y_off,
1915 int bx_off, int by_off, int block_w, int block_h,
1916 int width, int height, VP56mv *mv)
1921 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1922 ref_frame, mv, x_off + bx_off, y_off + by_off,
1923 block_w, block_h, width, height, s->linesize,
1924 s->put_pixels_tab[block_w == 8]);
1927 if (s->profile == 3) {
1928 /* this block only applies VP8; it is safe to check
1929 * only the profile, as VP7 profile <= 1 */
1941 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1942 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1943 &uvmv, x_off + bx_off, y_off + by_off,
1944 block_w, block_h, width, height, s->uvlinesize,
1945 s->put_pixels_tab[1 + (block_w == 4)]);
1948 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1949 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1950 static av_always_inline
1951 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1954 /* Don't prefetch refs that haven't been used very often this frame. */
1955 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1956 int x_off = mb_x << 4, y_off = mb_y << 4;
1957 int mx = (mb->mv.x >> 2) + x_off + 8;
1958 int my = (mb->mv.y >> 2) + y_off;
1959 uint8_t **src = s->framep[ref]->tf.f->data;
1960 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1961 /* For threading, a ff_thread_await_progress here might be useful, but
1962 * it actually slows down the decoder. Since a bad prefetch doesn't
1963 * generate bad decoder output, we don't run it here. */
1964 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1965 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1966 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1971 * Apply motion vectors to prediction buffer, chapter 18.
1973 static av_always_inline
1974 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1975 VP8Macroblock *mb, int mb_x, int mb_y)
1977 int x_off = mb_x << 4, y_off = mb_y << 4;
1978 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1979 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1980 VP56mv *bmv = mb->bmv;
1982 switch (mb->partitioning) {
1983 case VP8_SPLITMVMODE_NONE:
1984 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1985 0, 0, 16, 16, width, height, &mb->mv);
1987 case VP8_SPLITMVMODE_4x4: {
1992 for (y = 0; y < 4; y++) {
1993 for (x = 0; x < 4; x++) {
1994 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1995 ref, &bmv[4 * y + x],
1996 4 * x + x_off, 4 * y + y_off, 4, 4,
1997 width, height, s->linesize,
1998 s->put_pixels_tab[2]);
2007 for (y = 0; y < 2; y++) {
2008 for (x = 0; x < 2; x++) {
2009 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2010 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2011 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2012 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2013 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2014 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2015 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2016 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2017 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2018 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2019 if (s->profile == 3) {
2023 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2024 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2025 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2026 width, height, s->uvlinesize,
2027 s->put_pixels_tab[2]);
2032 case VP8_SPLITMVMODE_16x8:
2033 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2034 0, 0, 16, 8, width, height, &bmv[0]);
2035 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2036 0, 8, 16, 8, width, height, &bmv[1]);
2038 case VP8_SPLITMVMODE_8x16:
2039 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040 0, 0, 8, 16, width, height, &bmv[0]);
2041 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042 8, 0, 8, 16, width, height, &bmv[1]);
2044 case VP8_SPLITMVMODE_8x8:
2045 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2046 0, 0, 8, 8, width, height, &bmv[0]);
2047 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2048 8, 0, 8, 8, width, height, &bmv[1]);
2049 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2050 0, 8, 8, 8, width, height, &bmv[2]);
2051 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2052 8, 8, 8, 8, width, height, &bmv[3]);
2057 static av_always_inline
2058 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2062 if (mb->mode != MODE_I4x4) {
2063 uint8_t *y_dst = dst[0];
2064 for (y = 0; y < 4; y++) {
2065 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2067 if (nnz4 & ~0x01010101) {
2068 for (x = 0; x < 4; x++) {
2069 if ((uint8_t) nnz4 == 1)
2070 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2073 else if ((uint8_t) nnz4 > 1)
2074 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2082 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2085 y_dst += 4 * s->linesize;
2089 for (ch = 0; ch < 2; ch++) {
2090 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2092 uint8_t *ch_dst = dst[1 + ch];
2093 if (nnz4 & ~0x01010101) {
2094 for (y = 0; y < 2; y++) {
2095 for (x = 0; x < 2; x++) {
2096 if ((uint8_t) nnz4 == 1)
2097 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2098 td->block[4 + ch][(y << 1) + x],
2100 else if ((uint8_t) nnz4 > 1)
2101 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2102 td->block[4 + ch][(y << 1) + x],
2106 goto chroma_idct_end;
2108 ch_dst += 4 * s->uvlinesize;
2111 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2119 static av_always_inline
2120 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2121 VP8FilterStrength *f, int is_vp7)
2123 int interior_limit, filter_level;
2125 if (s->segmentation.enabled) {
2126 filter_level = s->segmentation.filter_level[mb->segment];
2127 if (!s->segmentation.absolute_vals)
2128 filter_level += s->filter.level;
2130 filter_level = s->filter.level;
2132 if (s->lf_delta.enabled) {
2133 filter_level += s->lf_delta.ref[mb->ref_frame];
2134 filter_level += s->lf_delta.mode[mb->mode];
2137 filter_level = av_clip_uintp2(filter_level, 6);
2139 interior_limit = filter_level;
2140 if (s->filter.sharpness) {
2141 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2142 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2144 interior_limit = FFMAX(interior_limit, 1);
2146 f->filter_level = filter_level;
2147 f->inner_limit = interior_limit;
2148 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2149 mb->mode == VP8_MVMODE_SPLIT;
2152 static av_always_inline
2153 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2154 int mb_x, int mb_y, int is_vp7)
2156 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2157 int filter_level = f->filter_level;
2158 int inner_limit = f->inner_limit;
2159 int inner_filter = f->inner_filter;
2160 ptrdiff_t linesize = s->linesize;
2161 ptrdiff_t uvlinesize = s->uvlinesize;
2162 static const uint8_t hev_thresh_lut[2][64] = {
2163 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2164 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2165 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2167 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2168 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2169 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2177 bedge_lim_y = filter_level;
2178 bedge_lim_uv = filter_level * 2;
2179 mbedge_lim = filter_level + 2;
2182 bedge_lim_uv = filter_level * 2 + inner_limit;
2183 mbedge_lim = bedge_lim_y + 4;
2186 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2189 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2190 mbedge_lim, inner_limit, hev_thresh);
2191 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2192 mbedge_lim, inner_limit, hev_thresh);
2195 #define H_LOOP_FILTER_16Y_INNER(cond) \
2196 if (cond && inner_filter) { \
2197 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2198 bedge_lim_y, inner_limit, \
2200 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2201 bedge_lim_y, inner_limit, \
2203 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2204 bedge_lim_y, inner_limit, \
2206 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2207 uvlinesize, bedge_lim_uv, \
2208 inner_limit, hev_thresh); \
2211 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2214 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2215 mbedge_lim, inner_limit, hev_thresh);
2216 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2217 mbedge_lim, inner_limit, hev_thresh);
2221 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2222 linesize, bedge_lim_y,
2223 inner_limit, hev_thresh);
2224 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2225 linesize, bedge_lim_y,
2226 inner_limit, hev_thresh);
2227 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2228 linesize, bedge_lim_y,
2229 inner_limit, hev_thresh);
2230 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2231 dst[2] + 4 * uvlinesize,
2232 uvlinesize, bedge_lim_uv,
2233 inner_limit, hev_thresh);
2236 H_LOOP_FILTER_16Y_INNER(is_vp7)
2239 static av_always_inline
2240 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2243 int mbedge_lim, bedge_lim;
2244 int filter_level = f->filter_level;
2245 int inner_limit = f->inner_limit;
2246 int inner_filter = f->inner_filter;
2247 ptrdiff_t linesize = s->linesize;
2252 bedge_lim = 2 * filter_level + inner_limit;
2253 mbedge_lim = bedge_lim + 4;
2256 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2258 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2259 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2260 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2264 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2266 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2267 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2268 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2272 #define MARGIN (16 << 2)
2273 static av_always_inline
2274 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2275 VP8Frame *prev_frame, int is_vp7)
2277 VP8Context *s = avctx->priv_data;
2280 s->mv_bounds.mv_min.y = -MARGIN;
2281 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2282 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2283 VP8Macroblock *mb = s->macroblocks_base +
2284 ((s->mb_width + 1) * (mb_y + 1) + 1);
2285 int mb_xy = mb_y * s->mb_width;
2287 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2289 s->mv_bounds.mv_min.x = -MARGIN;
2290 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2292 if (vpX_rac_is_end(&s->c)) {
2293 return AVERROR_INVALIDDATA;
2295 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2297 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2298 DC_PRED * 0x01010101);
2299 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2300 prev_frame && prev_frame->seg_map ?
2301 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2302 s->mv_bounds.mv_min.x -= 64;
2303 s->mv_bounds.mv_max.x -= 64;
2305 s->mv_bounds.mv_min.y -= 64;
2306 s->mv_bounds.mv_max.y -= 64;
2311 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2312 VP8Frame *prev_frame)
2314 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2317 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2318 VP8Frame *prev_frame)
2320 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2324 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2326 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2327 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2328 pthread_mutex_lock(&otd->lock); \
2329 atomic_store(&td->wait_mb_pos, tmp); \
2331 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2333 pthread_cond_wait(&otd->cond, &otd->lock); \
2335 atomic_store(&td->wait_mb_pos, INT_MAX); \
2336 pthread_mutex_unlock(&otd->lock); \
2340 #define update_pos(td, mb_y, mb_x) \
2342 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2343 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2345 int is_null = !next_td || !prev_td; \
2346 int pos_check = (is_null) ? 1 : \
2347 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2348 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2349 atomic_store(&td->thread_mb_pos, pos); \
2350 if (sliced_threading && pos_check) { \
2351 pthread_mutex_lock(&td->lock); \
2352 pthread_cond_broadcast(&td->cond); \
2353 pthread_mutex_unlock(&td->lock); \
2357 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2358 #define update_pos(td, mb_y, mb_x) while(0)
2361 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2362 int jobnr, int threadnr, int is_vp7)
2364 VP8Context *s = avctx->priv_data;
2365 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2366 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2367 int mb_x, mb_xy = mb_y * s->mb_width;
2368 int num_jobs = s->num_jobs;
2369 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2370 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2373 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2374 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2375 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2378 if (vpX_rac_is_end(c))
2379 return AVERROR_INVALIDDATA;
2384 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2385 if (mb_y == s->mb_height - 1)
2388 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2389 if (s->mb_layout == 1)
2390 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2392 // Make sure the previous frame has read its segmentation map,
2393 // if we re-use the same map.
2394 if (prev_frame && s->segmentation.enabled &&
2395 !s->segmentation.update_map)
2396 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2397 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2398 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2399 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2402 if (!is_vp7 || mb_y == 0)
2403 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2405 td->mv_bounds.mv_min.x = -MARGIN;
2406 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2408 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2409 if (vpX_rac_is_end(c))
2410 return AVERROR_INVALIDDATA;
2411 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2412 if (prev_td != td) {
2413 if (threadnr != 0) {
2414 check_thread_pos(td, prev_td,
2415 mb_x + (is_vp7 ? 2 : 1),
2416 mb_y - (is_vp7 ? 2 : 1));
2418 check_thread_pos(td, prev_td,
2419 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2420 mb_y - (is_vp7 ? 2 : 1));
2424 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2426 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2427 dst[2] - dst[1], 2);
2430 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2431 prev_frame && prev_frame->seg_map ?
2432 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2434 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2437 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2439 if (mb->mode <= MODE_I4x4)
2440 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2442 inter_predict(s, td, dst, mb, mb_x, mb_y);
2444 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2447 idct_mb(s, td, dst, mb);
2449 AV_ZERO64(td->left_nnz);
2450 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2452 /* Reset DC block predictors if they would exist
2453 * if the mb had coefficients */
2454 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2455 td->left_nnz[8] = 0;
2456 s->top_nnz[mb_x][8] = 0;
2460 if (s->deblock_filter)
2461 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2463 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2464 if (s->filter.simple)
2465 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2466 NULL, NULL, s->linesize, 0, 1);
2468 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2472 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2477 td->mv_bounds.mv_min.x -= 64;
2478 td->mv_bounds.mv_max.x -= 64;
2480 if (mb_x == s->mb_width + 1) {
2481 update_pos(td, mb_y, s->mb_width + 3);
2483 update_pos(td, mb_y, mb_x);
2489 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2490 int jobnr, int threadnr)
2492 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2495 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2496 int jobnr, int threadnr)
2498 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2501 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2502 int jobnr, int threadnr, int is_vp7)
2504 VP8Context *s = avctx->priv_data;
2505 VP8ThreadData *td = &s->thread_data[threadnr];
2506 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2507 AVFrame *curframe = s->curframe->tf.f;
2509 VP8ThreadData *prev_td, *next_td;
2511 curframe->data[0] + 16 * mb_y * s->linesize,
2512 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2513 curframe->data[2] + 8 * mb_y * s->uvlinesize
2516 if (s->mb_layout == 1)
2517 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2519 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2524 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2525 if (mb_y == s->mb_height - 1)
2528 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2530 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2531 VP8FilterStrength *f = &td->filter_strength[mb_x];
2533 check_thread_pos(td, prev_td,
2534 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2536 if (next_td != &s->thread_data[0])
2537 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2539 if (num_jobs == 1) {
2540 if (s->filter.simple)
2541 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2542 NULL, NULL, s->linesize, 0, 1);
2544 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2545 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2548 if (s->filter.simple)
2549 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2551 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2556 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2560 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2561 int jobnr, int threadnr)
2563 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2566 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2567 int jobnr, int threadnr)
2569 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2572 static av_always_inline
2573 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2574 int threadnr, int is_vp7)
2576 VP8Context *s = avctx->priv_data;
2577 VP8ThreadData *td = &s->thread_data[jobnr];
2578 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2579 VP8Frame *curframe = s->curframe;
2580 int mb_y, num_jobs = s->num_jobs;
2583 td->thread_nr = threadnr;
2584 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2585 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2586 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2587 atomic_store(&td->thread_mb_pos, mb_y << 16);
2588 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2590 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2593 if (s->deblock_filter)
2594 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2595 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2597 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2598 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2600 if (avctx->active_thread_type == FF_THREAD_FRAME)
2601 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2607 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2608 int jobnr, int threadnr)
2610 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2613 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2614 int jobnr, int threadnr)
2616 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2619 static av_always_inline
2620 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2621 AVPacket *avpkt, int is_vp7)
2623 VP8Context *s = avctx->priv_data;
2624 int ret, i, referenced, num_jobs;
2625 enum AVDiscard skip_thresh;
2626 VP8Frame *av_uninit(curframe), *prev_frame;
2629 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2631 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2636 if (s->actually_webp) {
2637 // avctx->pix_fmt already set in caller.
2638 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2639 s->pix_fmt = get_pixel_format(s);
2640 if (s->pix_fmt < 0) {
2641 ret = AVERROR(EINVAL);
2644 avctx->pix_fmt = s->pix_fmt;
2647 prev_frame = s->framep[VP56_FRAME_CURRENT];
2649 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2650 s->update_altref == VP56_FRAME_CURRENT;
2652 skip_thresh = !referenced ? AVDISCARD_NONREF
2653 : !s->keyframe ? AVDISCARD_NONKEY
2656 if (avctx->skip_frame >= skip_thresh) {
2658 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2661 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2663 // release no longer referenced frames
2664 for (i = 0; i < 5; i++)
2665 if (s->frames[i].tf.f->buf[0] &&
2666 &s->frames[i] != prev_frame &&
2667 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2668 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2669 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2670 vp8_release_frame(s, &s->frames[i]);
2672 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2675 avctx->colorspace = AVCOL_SPC_BT470BG;
2677 avctx->color_range = AVCOL_RANGE_JPEG;
2679 avctx->color_range = AVCOL_RANGE_MPEG;
2681 /* Given that arithmetic probabilities are updated every frame, it's quite
2682 * likely that the values we have on a random interframe are complete
2683 * junk if we didn't start decode on a keyframe. So just don't display
2684 * anything rather than junk. */
2685 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2686 !s->framep[VP56_FRAME_GOLDEN] ||
2687 !s->framep[VP56_FRAME_GOLDEN2])) {
2688 av_log(avctx, AV_LOG_WARNING,
2689 "Discarding interframe without a prior keyframe!\n");
2690 ret = AVERROR_INVALIDDATA;
2694 curframe->tf.f->key_frame = s->keyframe;
2695 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2696 : AV_PICTURE_TYPE_P;
2697 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2700 // check if golden and altref are swapped
2701 if (s->update_altref != VP56_FRAME_NONE)
2702 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2704 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2706 if (s->update_golden != VP56_FRAME_NONE)
2707 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2709 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2712 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2714 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2716 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2718 ff_thread_finish_setup(avctx);
2720 if (avctx->hwaccel) {
2721 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2725 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2729 ret = avctx->hwaccel->end_frame(avctx);
2734 s->linesize = curframe->tf.f->linesize[0];
2735 s->uvlinesize = curframe->tf.f->linesize[1];
2737 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2738 /* Zero macroblock structures for top/top-left prediction
2739 * from outside the frame. */
2741 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2742 (s->mb_width + 1) * sizeof(*s->macroblocks));
2743 if (!s->mb_layout && s->keyframe)
2744 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2746 memset(s->ref_count, 0, sizeof(s->ref_count));
2748 if (s->mb_layout == 1) {
2749 // Make sure the previous frame has read its segmentation map,
2750 // if we re-use the same map.
2751 if (prev_frame && s->segmentation.enabled &&
2752 !s->segmentation.update_map)
2753 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2755 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2757 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2762 if (avctx->active_thread_type == FF_THREAD_FRAME)
2765 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2766 s->num_jobs = num_jobs;
2767 s->curframe = curframe;
2768 s->prev_frame = prev_frame;
2769 s->mv_bounds.mv_min.y = -MARGIN;
2770 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2771 for (i = 0; i < MAX_THREADS; i++) {
2772 VP8ThreadData *td = &s->thread_data[i];
2773 atomic_init(&td->thread_mb_pos, 0);
2774 atomic_init(&td->wait_mb_pos, INT_MAX);
2777 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2780 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2784 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2785 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2788 // if future frames don't use the updated probabilities,
2789 // reset them to the values we saved
2790 if (!s->update_probabilities)
2791 s->prob[0] = s->prob[1];
2793 if (!s->invisible) {
2794 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2801 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2805 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2808 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2811 #if CONFIG_VP7_DECODER
2812 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2815 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2817 #endif /* CONFIG_VP7_DECODER */
2819 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2821 VP8Context *s = avctx->priv_data;
2827 vp8_decode_flush_impl(avctx, 1);
2828 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2829 av_frame_free(&s->frames[i].tf.f);
2834 static av_cold int vp8_init_frames(VP8Context *s)
2837 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2838 s->frames[i].tf.f = av_frame_alloc();
2839 if (!s->frames[i].tf.f)
2840 return AVERROR(ENOMEM);
2845 static av_always_inline
2846 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2848 VP8Context *s = avctx->priv_data;
2852 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2853 s->pix_fmt = AV_PIX_FMT_NONE;
2854 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2855 avctx->internal->allocate_progress = 1;
2857 ff_videodsp_init(&s->vdsp, 8);
2859 ff_vp78dsp_init(&s->vp8dsp);
2860 if (CONFIG_VP7_DECODER && is_vp7) {
2861 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2862 ff_vp7dsp_init(&s->vp8dsp);
2863 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2864 s->filter_mb_row = vp7_filter_mb_row;
2865 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2866 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2867 ff_vp8dsp_init(&s->vp8dsp);
2868 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2869 s->filter_mb_row = vp8_filter_mb_row;
2872 /* does not change for VP8 */
2873 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2875 if ((ret = vp8_init_frames(s)) < 0) {
2876 ff_vp8_decode_free(avctx);
2883 #if CONFIG_VP7_DECODER
2884 static int vp7_decode_init(AVCodecContext *avctx)
2886 return vp78_decode_init(avctx, IS_VP7);
2888 #endif /* CONFIG_VP7_DECODER */
2890 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2892 return vp78_decode_init(avctx, IS_VP8);
2895 #if CONFIG_VP8_DECODER
2897 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2899 VP8Context *s = avctx->priv_data;
2904 if ((ret = vp8_init_frames(s)) < 0) {
2905 ff_vp8_decode_free(avctx);
2912 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2914 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2915 const AVCodecContext *src)
2917 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2920 if (s->macroblocks_base &&
2921 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2923 s->mb_width = s_src->mb_width;
2924 s->mb_height = s_src->mb_height;
2927 s->pix_fmt = s_src->pix_fmt;
2928 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2929 s->segmentation = s_src->segmentation;
2930 s->lf_delta = s_src->lf_delta;
2931 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2933 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2934 if (s_src->frames[i].tf.f->buf[0]) {
2935 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2941 s->framep[0] = REBASE(s_src->next_framep[0]);
2942 s->framep[1] = REBASE(s_src->next_framep[1]);
2943 s->framep[2] = REBASE(s_src->next_framep[2]);
2944 s->framep[3] = REBASE(s_src->next_framep[3]);
2948 #endif /* HAVE_THREADS */
2949 #endif /* CONFIG_VP8_DECODER */
2951 #if CONFIG_VP7_DECODER
2952 AVCodec ff_vp7_decoder = {
2954 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2955 .type = AVMEDIA_TYPE_VIDEO,
2956 .id = AV_CODEC_ID_VP7,
2957 .priv_data_size = sizeof(VP8Context),
2958 .init = vp7_decode_init,
2959 .close = ff_vp8_decode_free,
2960 .decode = vp7_decode_frame,
2961 .capabilities = AV_CODEC_CAP_DR1,
2962 .flush = vp8_decode_flush,
2964 #endif /* CONFIG_VP7_DECODER */
2966 #if CONFIG_VP8_DECODER
2967 AVCodec ff_vp8_decoder = {
2969 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2970 .type = AVMEDIA_TYPE_VIDEO,
2971 .id = AV_CODEC_ID_VP8,
2972 .priv_data_size = sizeof(VP8Context),
2973 .init = ff_vp8_decode_init,
2974 .close = ff_vp8_decode_free,
2975 .decode = ff_vp8_decode_frame,
2976 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2977 AV_CODEC_CAP_SLICE_THREADS,
2978 .flush = vp8_decode_flush,
2979 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2980 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2981 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2982 #if CONFIG_VP8_VAAPI_HWACCEL
2985 #if CONFIG_VP8_NVDEC_HWACCEL
2991 #endif /* CONFIG_VP7_DECODER */