2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
50 static void free_buffers(VP8Context *s)
54 for (i = 0; i < MAX_THREADS; i++) {
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
59 av_freep(&s->thread_data[i].filter_strength);
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
67 s->macroblocks = NULL;
70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
103 #if CONFIG_VP8_DECODER
104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
108 vp8_release_frame(s, dst);
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
126 #endif /* CONFIG_VP8_DECODER */
128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 VP8Context *s = avctx->priv_data;
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
141 static void vp8_decode_flush(AVCodecContext *avctx)
143 vp8_decode_flush_impl(avctx, 0);
146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 VP8Frame *frame = NULL;
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
176 #if CONFIG_VP8_NVDEC_HWACCEL
183 return ff_get_format(s->avctx, pix_fmts);
186 static av_always_inline
187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189 AVCodecContext *avctx = s->avctx;
190 int i, ret, dim_reset = 0;
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
196 ret = ff_set_dimensions(s->avctx, width, height);
200 dim_reset = (s->macroblocks_base != NULL);
203 if ((s->pix_fmt == AV_PIX_FMT_NONE || dim_reset) &&
204 !s->actually_webp && !is_vp7) {
205 s->pix_fmt = get_pixel_format(s);
207 return AVERROR(EINVAL);
208 avctx->pix_fmt = s->pix_fmt;
211 s->mb_width = (s->avctx->coded_width + 15) / 16;
212 s->mb_height = (s->avctx->coded_height + 15) / 16;
214 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
215 avctx->thread_count > 1;
216 if (!s->mb_layout) { // Frame threading and one thread
217 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
218 sizeof(*s->macroblocks));
219 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
220 } else // Sliced threading
221 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
222 sizeof(*s->macroblocks));
223 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
224 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
225 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
227 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
228 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
230 return AVERROR(ENOMEM);
233 for (i = 0; i < MAX_THREADS; i++) {
234 s->thread_data[i].filter_strength =
235 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
236 if (!s->thread_data[i].filter_strength) {
238 return AVERROR(ENOMEM);
241 pthread_mutex_init(&s->thread_data[i].lock, NULL);
242 pthread_cond_init(&s->thread_data[i].cond, NULL);
246 s->macroblocks = s->macroblocks_base + 1;
251 static int vp7_update_dimensions(VP8Context *s, int width, int height)
253 return update_dimensions(s, width, height, IS_VP7);
256 static int vp8_update_dimensions(VP8Context *s, int width, int height)
258 return update_dimensions(s, width, height, IS_VP8);
262 static void parse_segment_info(VP8Context *s)
264 VP56RangeCoder *c = &s->c;
267 s->segmentation.update_map = vp8_rac_get(c);
268 s->segmentation.update_feature_data = vp8_rac_get(c);
270 if (s->segmentation.update_feature_data) {
271 s->segmentation.absolute_vals = vp8_rac_get(c);
273 for (i = 0; i < 4; i++)
274 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
276 for (i = 0; i < 4; i++)
277 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
279 if (s->segmentation.update_map)
280 for (i = 0; i < 3; i++)
281 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
284 static void update_lf_deltas(VP8Context *s)
286 VP56RangeCoder *c = &s->c;
289 for (i = 0; i < 4; i++) {
290 if (vp8_rac_get(c)) {
291 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
294 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
298 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
299 if (vp8_rac_get(c)) {
300 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
303 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
308 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
310 const uint8_t *sizes = buf;
314 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
316 buf += 3 * (s->num_coeff_partitions - 1);
317 buf_size -= 3 * (s->num_coeff_partitions - 1);
321 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
322 int size = AV_RL24(sizes + 3 * i);
323 if (buf_size - size < 0)
325 s->coeff_partition_size[i] = size;
327 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
334 s->coeff_partition_size[i] = buf_size;
335 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
340 static void vp7_get_quants(VP8Context *s)
342 VP56RangeCoder *c = &s->c;
344 int yac_qi = vp8_rac_get_uint(c, 7);
345 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
347 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
349 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
351 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
352 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
353 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
354 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
355 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
356 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
359 static void vp8_get_quants(VP8Context *s)
361 VP56RangeCoder *c = &s->c;
364 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
365 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
367 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
368 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
369 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
371 for (i = 0; i < 4; i++) {
372 if (s->segmentation.enabled) {
373 base_qi = s->segmentation.base_quant[i];
374 if (!s->segmentation.absolute_vals)
375 base_qi += s->quant.yac_qi;
377 base_qi = s->quant.yac_qi;
379 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
380 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
381 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
382 /* 101581>>16 is equivalent to 155/100 */
383 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
384 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
385 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
387 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
388 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
393 * Determine which buffers golden and altref should be updated with after this frame.
394 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
396 * Intra frames update all 3 references
397 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
398 * If the update (golden|altref) flag is set, it's updated with the current frame
399 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
400 * If the flag is not set, the number read means:
402 * 1: VP56_FRAME_PREVIOUS
403 * 2: update golden with altref, or update altref with golden
405 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
407 VP56RangeCoder *c = &s->c;
410 return VP56_FRAME_CURRENT;
412 switch (vp8_rac_get_uint(c, 2)) {
414 return VP56_FRAME_PREVIOUS;
416 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
418 return VP56_FRAME_NONE;
421 static void vp78_reset_probability_tables(VP8Context *s)
424 for (i = 0; i < 4; i++)
425 for (j = 0; j < 16; j++)
426 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
427 sizeof(s->prob->token[i][j]));
430 static void vp78_update_probability_tables(VP8Context *s)
432 VP56RangeCoder *c = &s->c;
435 for (i = 0; i < 4; i++)
436 for (j = 0; j < 8; j++)
437 for (k = 0; k < 3; k++)
438 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
439 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
440 int prob = vp8_rac_get_uint(c, 8);
441 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
442 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
446 #define VP7_MVC_SIZE 17
447 #define VP8_MVC_SIZE 19
449 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
452 VP56RangeCoder *c = &s->c;
456 for (i = 0; i < 4; i++)
457 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
459 for (i = 0; i < 3; i++)
460 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
462 // 17.2 MV probability update
463 for (i = 0; i < 2; i++)
464 for (j = 0; j < mvc_size; j++)
465 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
466 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
469 static void update_refs(VP8Context *s)
471 VP56RangeCoder *c = &s->c;
473 int update_golden = vp8_rac_get(c);
474 int update_altref = vp8_rac_get(c);
476 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
477 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
480 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
484 for (j = 1; j < 3; j++) {
485 for (i = 0; i < height / 2; i++)
486 memcpy(dst->data[j] + i * dst->linesize[j],
487 src->data[j] + i * src->linesize[j], width / 2);
491 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
492 const uint8_t *src, ptrdiff_t src_linesize,
493 int width, int height,
497 for (j = 0; j < height; j++) {
498 const uint8_t *src2 = src + j * src_linesize;
499 uint8_t *dst2 = dst + j * dst_linesize;
500 for (i = 0; i < width; i++) {
502 dst2[i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
507 static int vp7_fade_frame(VP8Context *s, int alpha, int beta)
511 if (!s->keyframe && (alpha || beta)) {
512 int width = s->mb_width * 16;
513 int height = s->mb_height * 16;
516 if (!s->framep[VP56_FRAME_PREVIOUS] ||
517 !s->framep[VP56_FRAME_GOLDEN]) {
518 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
519 return AVERROR_INVALIDDATA;
523 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
525 /* preserve the golden frame, write a new previous frame */
526 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
527 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
528 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
531 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
533 copy_chroma(dst, src, width, height);
536 fade(dst->data[0], dst->linesize[0],
537 src->data[0], src->linesize[0],
538 width, height, alpha, beta);
544 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
546 VP56RangeCoder *c = &s->c;
547 int part1_size, hscale, vscale, i, j, ret;
548 int width = s->avctx->width;
549 int height = s->avctx->height;
554 return AVERROR_INVALIDDATA;
557 s->profile = (buf[0] >> 1) & 7;
558 if (s->profile > 1) {
559 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
560 return AVERROR_INVALIDDATA;
563 s->keyframe = !(buf[0] & 1);
565 part1_size = AV_RL24(buf) >> 4;
567 if (buf_size < 4 - s->profile + part1_size) {
568 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
569 return AVERROR_INVALIDDATA;
572 buf += 4 - s->profile;
573 buf_size -= 4 - s->profile;
575 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
577 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
581 buf_size -= part1_size;
583 /* A. Dimension information (keyframes only) */
585 width = vp8_rac_get_uint(c, 12);
586 height = vp8_rac_get_uint(c, 12);
587 hscale = vp8_rac_get_uint(c, 2);
588 vscale = vp8_rac_get_uint(c, 2);
589 if (hscale || vscale)
590 avpriv_request_sample(s->avctx, "Upscaling");
592 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
593 vp78_reset_probability_tables(s);
594 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
595 sizeof(s->prob->pred16x16));
596 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
597 sizeof(s->prob->pred8x8c));
598 for (i = 0; i < 2; i++)
599 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
600 sizeof(vp7_mv_default_prob[i]));
601 memset(&s->segmentation, 0, sizeof(s->segmentation));
602 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
603 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
606 if (s->keyframe || s->profile > 0)
607 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
609 /* B. Decoding information for all four macroblock-level features */
610 for (i = 0; i < 4; i++) {
611 s->feature_enabled[i] = vp8_rac_get(c);
612 if (s->feature_enabled[i]) {
613 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
615 for (j = 0; j < 3; j++)
616 s->feature_index_prob[i][j] =
617 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
619 if (vp7_feature_value_size[s->profile][i])
620 for (j = 0; j < 4; j++)
621 s->feature_value[i][j] =
622 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
626 s->segmentation.enabled = 0;
627 s->segmentation.update_map = 0;
628 s->lf_delta.enabled = 0;
630 s->num_coeff_partitions = 1;
631 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
635 if (!s->macroblocks_base || /* first frame */
636 width != s->avctx->width || height != s->avctx->height ||
637 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
638 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
642 /* C. Dequantization indices */
645 /* D. Golden frame update flag (a Flag) for interframes only */
647 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
648 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
652 s->update_probabilities = 1;
655 if (s->profile > 0) {
656 s->update_probabilities = vp8_rac_get(c);
657 if (!s->update_probabilities)
658 s->prob[1] = s->prob[0];
661 s->fade_present = vp8_rac_get(c);
664 if (vpX_rac_is_end(c))
665 return AVERROR_INVALIDDATA;
666 /* E. Fading information for previous frame */
667 if (s->fade_present && vp8_rac_get(c)) {
668 alpha = (int8_t) vp8_rac_get_uint(c, 8);
669 beta = (int8_t) vp8_rac_get_uint(c, 8);
672 /* F. Loop filter type */
674 s->filter.simple = vp8_rac_get(c);
676 /* G. DCT coefficient ordering specification */
678 for (i = 1; i < 16; i++)
679 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
681 /* H. Loop filter levels */
683 s->filter.simple = vp8_rac_get(c);
684 s->filter.level = vp8_rac_get_uint(c, 6);
685 s->filter.sharpness = vp8_rac_get_uint(c, 3);
687 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
688 vp78_update_probability_tables(s);
690 s->mbskip_enabled = 0;
692 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
694 s->prob->intra = vp8_rac_get_uint(c, 8);
695 s->prob->last = vp8_rac_get_uint(c, 8);
696 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
699 if (vpX_rac_is_end(c))
700 return AVERROR_INVALIDDATA;
702 if ((ret = vp7_fade_frame(s, alpha, beta)) < 0)
708 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
710 VP56RangeCoder *c = &s->c;
711 int header_size, hscale, vscale, ret;
712 int width = s->avctx->width;
713 int height = s->avctx->height;
716 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
717 return AVERROR_INVALIDDATA;
720 s->keyframe = !(buf[0] & 1);
721 s->profile = (buf[0]>>1) & 7;
722 s->invisible = !(buf[0] & 0x10);
723 header_size = AV_RL24(buf) >> 5;
727 s->header_partition_size = header_size;
730 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
733 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
734 sizeof(s->put_pixels_tab));
735 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
736 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
737 sizeof(s->put_pixels_tab));
739 if (header_size > buf_size - 7 * s->keyframe) {
740 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
741 return AVERROR_INVALIDDATA;
745 if (AV_RL24(buf) != 0x2a019d) {
746 av_log(s->avctx, AV_LOG_ERROR,
747 "Invalid start code 0x%x\n", AV_RL24(buf));
748 return AVERROR_INVALIDDATA;
750 width = AV_RL16(buf + 3) & 0x3fff;
751 height = AV_RL16(buf + 5) & 0x3fff;
752 hscale = buf[4] >> 6;
753 vscale = buf[6] >> 6;
757 if (hscale || vscale)
758 avpriv_request_sample(s->avctx, "Upscaling");
760 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
761 vp78_reset_probability_tables(s);
762 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
763 sizeof(s->prob->pred16x16));
764 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
765 sizeof(s->prob->pred8x8c));
766 memcpy(s->prob->mvc, vp8_mv_default_prob,
767 sizeof(s->prob->mvc));
768 memset(&s->segmentation, 0, sizeof(s->segmentation));
769 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
772 ret = ff_vp56_init_range_decoder(c, buf, header_size);
776 buf_size -= header_size;
779 s->colorspace = vp8_rac_get(c);
781 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
782 s->fullrange = vp8_rac_get(c);
785 if ((s->segmentation.enabled = vp8_rac_get(c)))
786 parse_segment_info(s);
788 s->segmentation.update_map = 0; // FIXME: move this to some init function?
790 s->filter.simple = vp8_rac_get(c);
791 s->filter.level = vp8_rac_get_uint(c, 6);
792 s->filter.sharpness = vp8_rac_get_uint(c, 3);
794 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
795 s->lf_delta.update = vp8_rac_get(c);
796 if (s->lf_delta.update)
800 if (setup_partitions(s, buf, buf_size)) {
801 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
802 return AVERROR_INVALIDDATA;
805 if (!s->macroblocks_base || /* first frame */
806 width != s->avctx->width || height != s->avctx->height ||
807 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
808 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
815 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
816 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
819 // if we aren't saving this frame's probabilities for future frames,
820 // make a copy of the current probabilities
821 if (!(s->update_probabilities = vp8_rac_get(c)))
822 s->prob[1] = s->prob[0];
824 s->update_last = s->keyframe || vp8_rac_get(c);
826 vp78_update_probability_tables(s);
828 if ((s->mbskip_enabled = vp8_rac_get(c)))
829 s->prob->mbskip = vp8_rac_get_uint(c, 8);
832 s->prob->intra = vp8_rac_get_uint(c, 8);
833 s->prob->last = vp8_rac_get_uint(c, 8);
834 s->prob->golden = vp8_rac_get_uint(c, 8);
835 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
838 // Record the entropy coder state here so that hwaccels can use it.
839 s->c.code_word = vp56_rac_renorm(&s->c);
840 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
841 s->coder_state_at_header_end.range = s->c.high;
842 s->coder_state_at_header_end.value = s->c.code_word >> 16;
843 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
848 static av_always_inline
849 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
851 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
852 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
853 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
854 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
858 * Motion vector coding, 17.1.
860 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
864 if (vp56_rac_get_prob_branchy(c, p[0])) {
867 for (i = 0; i < 3; i++)
868 x += vp56_rac_get_prob(c, p[9 + i]) << i;
869 for (i = (vp7 ? 7 : 9); i > 3; i--)
870 x += vp56_rac_get_prob(c, p[9 + i]) << i;
871 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
875 const uint8_t *ps = p + 2;
876 bit = vp56_rac_get_prob(c, *ps);
879 bit = vp56_rac_get_prob(c, *ps);
882 x += vp56_rac_get_prob(c, *ps);
885 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
888 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
890 return read_mv_component(c, p, 1);
893 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
895 return read_mv_component(c, p, 0);
898 static av_always_inline
899 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
902 return vp7_submv_prob;
905 return vp8_submv_prob[4 - !!left];
907 return vp8_submv_prob[2];
908 return vp8_submv_prob[1 - !!left];
912 * Split motion vector prediction, 16.4.
913 * @returns the number of motion vectors parsed (2, 4 or 16)
915 static av_always_inline
916 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
917 int layout, int is_vp7)
921 VP8Macroblock *top_mb;
922 VP8Macroblock *left_mb = &mb[-1];
923 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
924 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
926 VP56mv *left_mv = left_mb->bmv;
927 VP56mv *cur_mv = mb->bmv;
929 if (!layout) // layout is inlined, s->mb_layout is not
932 top_mb = &mb[-s->mb_width - 1];
933 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
934 top_mv = top_mb->bmv;
936 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
937 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
938 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
940 part_idx = VP8_SPLITMVMODE_8x8;
942 part_idx = VP8_SPLITMVMODE_4x4;
945 num = vp8_mbsplit_count[part_idx];
946 mbsplits_cur = vp8_mbsplits[part_idx],
947 firstidx = vp8_mbfirstidx[part_idx];
948 mb->partitioning = part_idx;
950 for (n = 0; n < num; n++) {
952 uint32_t left, above;
953 const uint8_t *submv_prob;
956 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
958 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
960 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
962 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
964 submv_prob = get_submv_prob(left, above, is_vp7);
966 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
967 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
968 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
969 mb->bmv[n].y = mb->mv.y +
970 read_mv_component(c, s->prob->mvc[0], is_vp7);
971 mb->bmv[n].x = mb->mv.x +
972 read_mv_component(c, s->prob->mvc[1], is_vp7);
974 AV_ZERO32(&mb->bmv[n]);
977 AV_WN32A(&mb->bmv[n], above);
980 AV_WN32A(&mb->bmv[n], left);
988 * The vp7 reference decoder uses a padding macroblock column (added to right
989 * edge of the frame) to guard against illegal macroblock offsets. The
990 * algorithm has bugs that permit offsets to straddle the padding column.
991 * This function replicates those bugs.
993 * @param[out] edge_x macroblock x address
994 * @param[out] edge_y macroblock y address
996 * @return macroblock offset legal (boolean)
998 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
999 int xoffset, int yoffset, int boundary,
1000 int *edge_x, int *edge_y)
1002 int vwidth = mb_width + 1;
1003 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
1004 if (new < boundary || new % vwidth == vwidth - 1)
1006 *edge_y = new / vwidth;
1007 *edge_x = new % vwidth;
1011 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1013 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1016 static av_always_inline
1017 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1018 int mb_x, int mb_y, int layout)
1020 VP8Macroblock *mb_edge[12];
1021 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1022 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1025 uint8_t cnt[3] = { 0 };
1026 VP56RangeCoder *c = &s->c;
1029 AV_ZERO32(&near_mv[0]);
1030 AV_ZERO32(&near_mv[1]);
1031 AV_ZERO32(&near_mv[2]);
1033 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1034 const VP7MVPred * pred = &vp7_mv_pred[i];
1037 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1038 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1039 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1040 ? s->macroblocks_base + 1 + edge_x +
1041 (s->mb_width + 1) * (edge_y + 1)
1042 : s->macroblocks + edge_x +
1043 (s->mb_height - edge_y - 1) * 2;
1044 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1046 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1047 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1049 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1050 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1054 AV_WN32A(&near_mv[CNT_NEAR], mv);
1058 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1067 cnt[idx] += vp7_mv_pred[i].score;
1070 mb->partitioning = VP8_SPLITMVMODE_NONE;
1072 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1073 mb->mode = VP8_MVMODE_MV;
1075 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1077 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1079 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1080 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1082 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1084 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1085 mb->mode = VP8_MVMODE_SPLIT;
1086 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1088 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1089 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1090 mb->bmv[0] = mb->mv;
1093 mb->mv = near_mv[CNT_NEAR];
1094 mb->bmv[0] = mb->mv;
1097 mb->mv = near_mv[CNT_NEAREST];
1098 mb->bmv[0] = mb->mv;
1101 mb->mode = VP8_MVMODE_ZERO;
1103 mb->bmv[0] = mb->mv;
1107 static av_always_inline
1108 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1109 int mb_x, int mb_y, int layout)
1111 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1114 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1115 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1117 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1118 int8_t *sign_bias = s->sign_bias;
1120 uint8_t cnt[4] = { 0 };
1121 VP56RangeCoder *c = &s->c;
1123 if (!layout) { // layout is inlined (s->mb_layout is not)
1124 mb_edge[0] = mb + 2;
1125 mb_edge[2] = mb + 1;
1127 mb_edge[0] = mb - s->mb_width - 1;
1128 mb_edge[2] = mb - s->mb_width - 2;
1131 AV_ZERO32(&near_mv[0]);
1132 AV_ZERO32(&near_mv[1]);
1133 AV_ZERO32(&near_mv[2]);
1135 /* Process MB on top, left and top-left */
1136 #define MV_EDGE_CHECK(n) \
1138 VP8Macroblock *edge = mb_edge[n]; \
1139 int edge_ref = edge->ref_frame; \
1140 if (edge_ref != VP56_FRAME_CURRENT) { \
1141 uint32_t mv = AV_RN32A(&edge->mv); \
1143 if (cur_sign_bias != sign_bias[edge_ref]) { \
1144 /* SWAR negate of the values in mv. */ \
1146 mv = ((mv & 0x7fff7fff) + \
1147 0x00010001) ^ (mv & 0x80008000); \
1149 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1150 AV_WN32A(&near_mv[++idx], mv); \
1151 cnt[idx] += 1 + (n != 2); \
1153 cnt[CNT_ZERO] += 1 + (n != 2); \
1161 mb->partitioning = VP8_SPLITMVMODE_NONE;
1162 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1163 mb->mode = VP8_MVMODE_MV;
1165 /* If we have three distinct MVs, merge first and last if they're the same */
1166 if (cnt[CNT_SPLITMV] &&
1167 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1168 cnt[CNT_NEAREST] += 1;
1170 /* Swap near and nearest if necessary */
1171 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1172 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1173 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1176 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1177 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1178 /* Choose the best mv out of 0,0 and the nearest mv */
1179 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1180 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1181 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1182 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1184 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1185 mb->mode = VP8_MVMODE_SPLIT;
1186 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1188 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1189 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1190 mb->bmv[0] = mb->mv;
1193 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1194 mb->bmv[0] = mb->mv;
1197 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1198 mb->bmv[0] = mb->mv;
1201 mb->mode = VP8_MVMODE_ZERO;
1203 mb->bmv[0] = mb->mv;
1207 static av_always_inline
1208 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1209 int mb_x, int keyframe, int layout)
1211 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1214 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1215 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1220 uint8_t *const left = s->intra4x4_pred_mode_left;
1222 top = mb->intra4x4_pred_mode_top;
1224 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1225 for (y = 0; y < 4; y++) {
1226 for (x = 0; x < 4; x++) {
1228 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1229 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1230 left[y] = top[x] = *intra4x4;
1236 for (i = 0; i < 16; i++)
1237 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1238 vp8_pred4x4_prob_inter);
1242 static av_always_inline
1243 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1244 VP8Macroblock *mb, int mb_x, int mb_y,
1245 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1247 VP56RangeCoder *c = &s->c;
1248 static const char * const vp7_feature_name[] = { "q-index",
1250 "partial-golden-update",
1255 for (i = 0; i < 4; i++) {
1256 if (s->feature_enabled[i]) {
1257 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1258 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1259 s->feature_index_prob[i]);
1260 av_log(s->avctx, AV_LOG_WARNING,
1261 "Feature %s present in macroblock (value 0x%x)\n",
1262 vp7_feature_name[i], s->feature_value[i][index]);
1266 } else if (s->segmentation.update_map) {
1267 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1268 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1269 } else if (s->segmentation.enabled)
1270 *segment = ref ? *ref : *segment;
1271 mb->segment = *segment;
1273 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1276 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1277 vp8_pred16x16_prob_intra);
1279 if (mb->mode == MODE_I4x4) {
1280 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1282 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1283 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1285 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1287 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1288 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1291 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1292 vp8_pred8x8c_prob_intra);
1293 mb->ref_frame = VP56_FRAME_CURRENT;
1294 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1296 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1298 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1299 : VP56_FRAME_GOLDEN;
1301 mb->ref_frame = VP56_FRAME_PREVIOUS;
1302 s->ref_count[mb->ref_frame - 1]++;
1304 // motion vectors, 16.3
1306 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1308 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1311 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1313 if (mb->mode == MODE_I4x4)
1314 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1316 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1318 mb->ref_frame = VP56_FRAME_CURRENT;
1319 mb->partitioning = VP8_SPLITMVMODE_NONE;
1320 AV_ZERO32(&mb->bmv[0]);
1325 * @param r arithmetic bitstream reader context
1326 * @param block destination for block coefficients
1327 * @param probs probabilities to use when reading trees from the bitstream
1328 * @param i initial coeff index, 0 unless a separate DC block is coded
1329 * @param qmul array holding the dc/ac dequant factor at position 0/1
1331 * @return 0 if no coeffs were decoded
1332 * otherwise, the index of the last coeff decoded plus one
1334 static av_always_inline
1335 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1336 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1337 int i, uint8_t *token_prob, int16_t qmul[2],
1338 const uint8_t scan[16], int vp7)
1340 VP56RangeCoder c = *r;
1345 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1349 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1351 break; // invalid input; blocks should end with EOB
1352 token_prob = probs[i][0];
1358 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1360 token_prob = probs[i + 1][1];
1362 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1363 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1365 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1369 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1370 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1371 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1372 } else { // DCT_CAT2
1374 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1375 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1377 } else { // DCT_CAT3 and up
1378 int a = vp56_rac_get_prob(&c, token_prob[8]);
1379 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1380 int cat = (a << 1) + b;
1381 coeff = 3 + (8 << cat);
1382 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1385 token_prob = probs[i + 1][2];
1387 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1394 static av_always_inline
1395 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1397 int16_t dc = block[0];
1405 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1406 block[0] = pred[0] = dc;
1411 block[0] = pred[0] = dc;
1417 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1419 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1420 int i, uint8_t *token_prob,
1422 const uint8_t scan[16])
1424 return decode_block_coeffs_internal(r, block, probs, i,
1425 token_prob, qmul, scan, IS_VP7);
1428 #ifndef vp8_decode_block_coeffs_internal
1429 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1431 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1432 int i, uint8_t *token_prob,
1435 return decode_block_coeffs_internal(r, block, probs, i,
1436 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1441 * @param c arithmetic bitstream reader context
1442 * @param block destination for block coefficients
1443 * @param probs probabilities to use when reading trees from the bitstream
1444 * @param i initial coeff index, 0 unless a separate DC block is coded
1445 * @param zero_nhood the initial prediction context for number of surrounding
1446 * all-zero blocks (only left/top, so 0-2)
1447 * @param qmul array holding the dc/ac dequant factor at position 0/1
1448 * @param scan scan pattern (VP7 only)
1450 * @return 0 if no coeffs were decoded
1451 * otherwise, the index of the last coeff decoded plus one
1453 static av_always_inline
1454 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1455 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1456 int i, int zero_nhood, int16_t qmul[2],
1457 const uint8_t scan[16], int vp7)
1459 uint8_t *token_prob = probs[i][zero_nhood];
1460 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1462 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1463 token_prob, qmul, scan)
1464 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1468 static av_always_inline
1469 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1470 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1473 int i, x, y, luma_start = 0, luma_ctx = 3;
1474 int nnz_pred, nnz, nnz_total = 0;
1475 int segment = mb->segment;
1478 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1479 nnz_pred = t_nnz[8] + l_nnz[8];
1481 // decode DC values and do hadamard
1482 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1483 nnz_pred, s->qmat[segment].luma_dc_qmul,
1484 ff_zigzag_scan, is_vp7);
1485 l_nnz[8] = t_nnz[8] = !!nnz;
1487 if (is_vp7 && mb->mode > MODE_I4x4) {
1488 nnz |= inter_predict_dc(td->block_dc,
1489 s->inter_dc_pred[mb->ref_frame - 1]);
1496 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1498 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1505 for (y = 0; y < 4; y++)
1506 for (x = 0; x < 4; x++) {
1507 nnz_pred = l_nnz[y] + t_nnz[x];
1508 nnz = decode_block_coeffs(c, td->block[y][x],
1509 s->prob->token[luma_ctx],
1510 luma_start, nnz_pred,
1511 s->qmat[segment].luma_qmul,
1512 s->prob[0].scan, is_vp7);
1513 /* nnz+block_dc may be one more than the actual last index,
1514 * but we don't care */
1515 td->non_zero_count_cache[y][x] = nnz + block_dc;
1516 t_nnz[x] = l_nnz[y] = !!nnz;
1521 // TODO: what to do about dimensions? 2nd dim for luma is x,
1522 // but for chroma it's (y<<1)|x
1523 for (i = 4; i < 6; i++)
1524 for (y = 0; y < 2; y++)
1525 for (x = 0; x < 2; x++) {
1526 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1527 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1528 s->prob->token[2], 0, nnz_pred,
1529 s->qmat[segment].chroma_qmul,
1530 s->prob[0].scan, is_vp7);
1531 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1532 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1536 // if there were no coded coeffs despite the macroblock not being marked skip,
1537 // we MUST not do the inner loop filter and should not do IDCT
1538 // Since skip isn't used for bitstream prediction, just manually set it.
1543 static av_always_inline
1544 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1545 uint8_t *src_cb, uint8_t *src_cr,
1546 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1548 AV_COPY128(top_border, src_y + 15 * linesize);
1550 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1551 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1555 static av_always_inline
1556 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1557 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1558 int mb_y, int mb_width, int simple, int xchg)
1560 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1562 src_cb -= uvlinesize;
1563 src_cr -= uvlinesize;
1565 #define XCHG(a, b, xchg) \
1573 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1574 XCHG(top_border, src_y, xchg);
1575 XCHG(top_border + 8, src_y + 8, 1);
1576 if (mb_x < mb_width - 1)
1577 XCHG(top_border + 32, src_y + 16, 1);
1579 // only copy chroma for normal loop filter
1580 // or to initialize the top row to 127
1581 if (!simple || !mb_y) {
1582 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1583 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1584 XCHG(top_border + 16, src_cb, 1);
1585 XCHG(top_border + 24, src_cr, 1);
1589 static av_always_inline
1590 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1593 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1595 return mb_y ? mode : LEFT_DC_PRED8x8;
1598 static av_always_inline
1599 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1602 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1604 return mb_y ? mode : HOR_PRED8x8;
1607 static av_always_inline
1608 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1612 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1614 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1616 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1617 case PLANE_PRED8x8: /* TM */
1618 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1623 static av_always_inline
1624 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1627 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1629 return mb_y ? mode : HOR_VP8_PRED;
1633 static av_always_inline
1634 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1635 int *copy_buf, int vp7)
1639 if (!mb_x && mb_y) {
1644 case DIAG_DOWN_LEFT_PRED:
1645 case VERT_LEFT_PRED:
1646 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1654 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1656 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1657 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1658 * as 16x16/8x8 DC */
1659 case DIAG_DOWN_RIGHT_PRED:
1660 case VERT_RIGHT_PRED:
1669 static av_always_inline
1670 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1671 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1673 int x, y, mode, nnz;
1676 /* for the first row, we need to run xchg_mb_border to init the top edge
1677 * to 127 otherwise, skip it if we aren't going to deblock */
1678 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1679 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1680 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1681 s->filter.simple, 1);
1683 if (mb->mode < MODE_I4x4) {
1684 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1685 s->hpc.pred16x16[mode](dst[0], s->linesize);
1687 uint8_t *ptr = dst[0];
1688 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1689 const uint8_t lo = is_vp7 ? 128 : 127;
1690 const uint8_t hi = is_vp7 ? 128 : 129;
1691 uint8_t tr_top[4] = { lo, lo, lo, lo };
1693 // all blocks on the right edge of the macroblock use bottom edge
1694 // the top macroblock for their topright edge
1695 uint8_t *tr_right = ptr - s->linesize + 16;
1697 // if we're on the right edge of the frame, said edge is extended
1698 // from the top macroblock
1699 if (mb_y && mb_x == s->mb_width - 1) {
1700 tr = tr_right[-1] * 0x01010101u;
1701 tr_right = (uint8_t *) &tr;
1705 AV_ZERO128(td->non_zero_count_cache);
1707 for (y = 0; y < 4; y++) {
1708 uint8_t *topright = ptr + 4 - s->linesize;
1709 for (x = 0; x < 4; x++) {
1711 ptrdiff_t linesize = s->linesize;
1712 uint8_t *dst = ptr + 4 * x;
1713 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1715 if ((y == 0 || x == 3) && mb_y == 0) {
1718 topright = tr_right;
1720 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1721 mb_y + y, ©, is_vp7);
1723 dst = copy_dst + 12;
1727 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1729 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1733 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1742 copy_dst[11] = ptr[4 * x - 1];
1743 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1744 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1745 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1748 s->hpc.pred4x4[mode](dst, topright, linesize);
1750 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1751 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1752 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1753 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1756 nnz = td->non_zero_count_cache[y][x];
1759 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1760 td->block[y][x], s->linesize);
1762 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1763 td->block[y][x], s->linesize);
1768 ptr += 4 * s->linesize;
1773 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1774 mb_x, mb_y, is_vp7);
1775 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1776 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1778 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1779 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1780 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1781 s->filter.simple, 0);
1784 static const uint8_t subpel_idx[3][8] = {
1785 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1786 // also function pointer index
1787 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1788 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1794 * @param s VP8 decoding context
1795 * @param dst target buffer for block data at block position
1796 * @param ref reference picture buffer at origin (0, 0)
1797 * @param mv motion vector (relative to block position) to get pixel data from
1798 * @param x_off horizontal position of block from origin (0, 0)
1799 * @param y_off vertical position of block from origin (0, 0)
1800 * @param block_w width of block (16, 8 or 4)
1801 * @param block_h height of block (always same as block_w)
1802 * @param width width of src/dst plane data
1803 * @param height height of src/dst plane data
1804 * @param linesize size of a single line of plane data, including padding
1805 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1807 static av_always_inline
1808 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1809 ThreadFrame *ref, const VP56mv *mv,
1810 int x_off, int y_off, int block_w, int block_h,
1811 int width, int height, ptrdiff_t linesize,
1812 vp8_mc_func mc_func[3][3])
1814 uint8_t *src = ref->f->data[0];
1817 ptrdiff_t src_linesize = linesize;
1819 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1820 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1822 x_off += mv->x >> 2;
1823 y_off += mv->y >> 2;
1826 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1827 src += y_off * linesize + x_off;
1828 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1829 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1830 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1831 src - my_idx * linesize - mx_idx,
1832 EDGE_EMU_LINESIZE, linesize,
1833 block_w + subpel_idx[1][mx],
1834 block_h + subpel_idx[1][my],
1835 x_off - mx_idx, y_off - my_idx,
1837 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1838 src_linesize = EDGE_EMU_LINESIZE;
1840 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1842 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1843 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1844 linesize, block_h, 0, 0);
1849 * chroma MC function
1851 * @param s VP8 decoding context
1852 * @param dst1 target buffer for block data at block position (U plane)
1853 * @param dst2 target buffer for block data at block position (V plane)
1854 * @param ref reference picture buffer at origin (0, 0)
1855 * @param mv motion vector (relative to block position) to get pixel data from
1856 * @param x_off horizontal position of block from origin (0, 0)
1857 * @param y_off vertical position of block from origin (0, 0)
1858 * @param block_w width of block (16, 8 or 4)
1859 * @param block_h height of block (always same as block_w)
1860 * @param width width of src/dst plane data
1861 * @param height height of src/dst plane data
1862 * @param linesize size of a single line of plane data, including padding
1863 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1865 static av_always_inline
1866 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1867 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1868 int x_off, int y_off, int block_w, int block_h,
1869 int width, int height, ptrdiff_t linesize,
1870 vp8_mc_func mc_func[3][3])
1872 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1875 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1876 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1878 x_off += mv->x >> 3;
1879 y_off += mv->y >> 3;
1882 src1 += y_off * linesize + x_off;
1883 src2 += y_off * linesize + x_off;
1884 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1885 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1886 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1887 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1888 src1 - my_idx * linesize - mx_idx,
1889 EDGE_EMU_LINESIZE, linesize,
1890 block_w + subpel_idx[1][mx],
1891 block_h + subpel_idx[1][my],
1892 x_off - mx_idx, y_off - my_idx, width, height);
1893 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1894 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1896 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1897 src2 - my_idx * linesize - mx_idx,
1898 EDGE_EMU_LINESIZE, linesize,
1899 block_w + subpel_idx[1][mx],
1900 block_h + subpel_idx[1][my],
1901 x_off - mx_idx, y_off - my_idx, width, height);
1902 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1903 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1905 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1906 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1909 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1910 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1911 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1915 static av_always_inline
1916 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1917 ThreadFrame *ref_frame, int x_off, int y_off,
1918 int bx_off, int by_off, int block_w, int block_h,
1919 int width, int height, VP56mv *mv)
1924 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1925 ref_frame, mv, x_off + bx_off, y_off + by_off,
1926 block_w, block_h, width, height, s->linesize,
1927 s->put_pixels_tab[block_w == 8]);
1930 if (s->profile == 3) {
1931 /* this block only applies VP8; it is safe to check
1932 * only the profile, as VP7 profile <= 1 */
1944 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1945 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1946 &uvmv, x_off + bx_off, y_off + by_off,
1947 block_w, block_h, width, height, s->uvlinesize,
1948 s->put_pixels_tab[1 + (block_w == 4)]);
1951 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1952 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1953 static av_always_inline
1954 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1957 /* Don't prefetch refs that haven't been used very often this frame. */
1958 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1959 int x_off = mb_x << 4, y_off = mb_y << 4;
1960 int mx = (mb->mv.x >> 2) + x_off + 8;
1961 int my = (mb->mv.y >> 2) + y_off;
1962 uint8_t **src = s->framep[ref]->tf.f->data;
1963 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1964 /* For threading, a ff_thread_await_progress here might be useful, but
1965 * it actually slows down the decoder. Since a bad prefetch doesn't
1966 * generate bad decoder output, we don't run it here. */
1967 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1968 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1969 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1974 * Apply motion vectors to prediction buffer, chapter 18.
1976 static av_always_inline
1977 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1978 VP8Macroblock *mb, int mb_x, int mb_y)
1980 int x_off = mb_x << 4, y_off = mb_y << 4;
1981 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1982 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1983 VP56mv *bmv = mb->bmv;
1985 switch (mb->partitioning) {
1986 case VP8_SPLITMVMODE_NONE:
1987 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1988 0, 0, 16, 16, width, height, &mb->mv);
1990 case VP8_SPLITMVMODE_4x4: {
1995 for (y = 0; y < 4; y++) {
1996 for (x = 0; x < 4; x++) {
1997 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1998 ref, &bmv[4 * y + x],
1999 4 * x + x_off, 4 * y + y_off, 4, 4,
2000 width, height, s->linesize,
2001 s->put_pixels_tab[2]);
2010 for (y = 0; y < 2; y++) {
2011 for (x = 0; x < 2; x++) {
2012 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2013 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2014 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2015 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2016 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2017 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2018 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2019 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2020 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2021 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2022 if (s->profile == 3) {
2026 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2027 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2028 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2029 width, height, s->uvlinesize,
2030 s->put_pixels_tab[2]);
2035 case VP8_SPLITMVMODE_16x8:
2036 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2037 0, 0, 16, 8, width, height, &bmv[0]);
2038 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2039 0, 8, 16, 8, width, height, &bmv[1]);
2041 case VP8_SPLITMVMODE_8x16:
2042 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2043 0, 0, 8, 16, width, height, &bmv[0]);
2044 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2045 8, 0, 8, 16, width, height, &bmv[1]);
2047 case VP8_SPLITMVMODE_8x8:
2048 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2049 0, 0, 8, 8, width, height, &bmv[0]);
2050 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2051 8, 0, 8, 8, width, height, &bmv[1]);
2052 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2053 0, 8, 8, 8, width, height, &bmv[2]);
2054 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2055 8, 8, 8, 8, width, height, &bmv[3]);
2060 static av_always_inline
2061 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2065 if (mb->mode != MODE_I4x4) {
2066 uint8_t *y_dst = dst[0];
2067 for (y = 0; y < 4; y++) {
2068 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2070 if (nnz4 & ~0x01010101) {
2071 for (x = 0; x < 4; x++) {
2072 if ((uint8_t) nnz4 == 1)
2073 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2076 else if ((uint8_t) nnz4 > 1)
2077 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2085 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2088 y_dst += 4 * s->linesize;
2092 for (ch = 0; ch < 2; ch++) {
2093 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2095 uint8_t *ch_dst = dst[1 + ch];
2096 if (nnz4 & ~0x01010101) {
2097 for (y = 0; y < 2; y++) {
2098 for (x = 0; x < 2; x++) {
2099 if ((uint8_t) nnz4 == 1)
2100 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2101 td->block[4 + ch][(y << 1) + x],
2103 else if ((uint8_t) nnz4 > 1)
2104 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2105 td->block[4 + ch][(y << 1) + x],
2109 goto chroma_idct_end;
2111 ch_dst += 4 * s->uvlinesize;
2114 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2122 static av_always_inline
2123 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2124 VP8FilterStrength *f, int is_vp7)
2126 int interior_limit, filter_level;
2128 if (s->segmentation.enabled) {
2129 filter_level = s->segmentation.filter_level[mb->segment];
2130 if (!s->segmentation.absolute_vals)
2131 filter_level += s->filter.level;
2133 filter_level = s->filter.level;
2135 if (s->lf_delta.enabled) {
2136 filter_level += s->lf_delta.ref[mb->ref_frame];
2137 filter_level += s->lf_delta.mode[mb->mode];
2140 filter_level = av_clip_uintp2(filter_level, 6);
2142 interior_limit = filter_level;
2143 if (s->filter.sharpness) {
2144 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2145 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2147 interior_limit = FFMAX(interior_limit, 1);
2149 f->filter_level = filter_level;
2150 f->inner_limit = interior_limit;
2151 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2152 mb->mode == VP8_MVMODE_SPLIT;
2155 static av_always_inline
2156 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2157 int mb_x, int mb_y, int is_vp7)
2159 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2160 int filter_level = f->filter_level;
2161 int inner_limit = f->inner_limit;
2162 int inner_filter = f->inner_filter;
2163 ptrdiff_t linesize = s->linesize;
2164 ptrdiff_t uvlinesize = s->uvlinesize;
2165 static const uint8_t hev_thresh_lut[2][64] = {
2166 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2167 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2168 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2170 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2171 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2172 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2180 bedge_lim_y = filter_level;
2181 bedge_lim_uv = filter_level * 2;
2182 mbedge_lim = filter_level + 2;
2185 bedge_lim_uv = filter_level * 2 + inner_limit;
2186 mbedge_lim = bedge_lim_y + 4;
2189 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2192 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2193 mbedge_lim, inner_limit, hev_thresh);
2194 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2195 mbedge_lim, inner_limit, hev_thresh);
2198 #define H_LOOP_FILTER_16Y_INNER(cond) \
2199 if (cond && inner_filter) { \
2200 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2201 bedge_lim_y, inner_limit, \
2203 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2204 bedge_lim_y, inner_limit, \
2206 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2207 bedge_lim_y, inner_limit, \
2209 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2210 uvlinesize, bedge_lim_uv, \
2211 inner_limit, hev_thresh); \
2214 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2217 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2218 mbedge_lim, inner_limit, hev_thresh);
2219 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2220 mbedge_lim, inner_limit, hev_thresh);
2224 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2225 linesize, bedge_lim_y,
2226 inner_limit, hev_thresh);
2227 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2228 linesize, bedge_lim_y,
2229 inner_limit, hev_thresh);
2230 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2231 linesize, bedge_lim_y,
2232 inner_limit, hev_thresh);
2233 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2234 dst[2] + 4 * uvlinesize,
2235 uvlinesize, bedge_lim_uv,
2236 inner_limit, hev_thresh);
2239 H_LOOP_FILTER_16Y_INNER(is_vp7)
2242 static av_always_inline
2243 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2246 int mbedge_lim, bedge_lim;
2247 int filter_level = f->filter_level;
2248 int inner_limit = f->inner_limit;
2249 int inner_filter = f->inner_filter;
2250 ptrdiff_t linesize = s->linesize;
2255 bedge_lim = 2 * filter_level + inner_limit;
2256 mbedge_lim = bedge_lim + 4;
2259 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2261 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2262 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2263 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2267 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2269 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2270 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2271 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2275 #define MARGIN (16 << 2)
2276 static av_always_inline
2277 int vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2278 VP8Frame *prev_frame, int is_vp7)
2280 VP8Context *s = avctx->priv_data;
2283 s->mv_bounds.mv_min.y = -MARGIN;
2284 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2285 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2286 VP8Macroblock *mb = s->macroblocks_base +
2287 ((s->mb_width + 1) * (mb_y + 1) + 1);
2288 int mb_xy = mb_y * s->mb_width;
2290 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2292 s->mv_bounds.mv_min.x = -MARGIN;
2293 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2295 if (vpX_rac_is_end(&s->c)) {
2296 return AVERROR_INVALIDDATA;
2298 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2300 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2301 DC_PRED * 0x01010101);
2302 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2303 prev_frame && prev_frame->seg_map ?
2304 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2305 s->mv_bounds.mv_min.x -= 64;
2306 s->mv_bounds.mv_max.x -= 64;
2308 s->mv_bounds.mv_min.y -= 64;
2309 s->mv_bounds.mv_max.y -= 64;
2314 static int vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2315 VP8Frame *prev_frame)
2317 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2320 static int vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2321 VP8Frame *prev_frame)
2323 return vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2327 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2329 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2330 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2331 pthread_mutex_lock(&otd->lock); \
2332 atomic_store(&td->wait_mb_pos, tmp); \
2334 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2336 pthread_cond_wait(&otd->cond, &otd->lock); \
2338 atomic_store(&td->wait_mb_pos, INT_MAX); \
2339 pthread_mutex_unlock(&otd->lock); \
2343 #define update_pos(td, mb_y, mb_x) \
2345 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2346 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2348 int is_null = !next_td || !prev_td; \
2349 int pos_check = (is_null) ? 1 : \
2350 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2351 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2352 atomic_store(&td->thread_mb_pos, pos); \
2353 if (sliced_threading && pos_check) { \
2354 pthread_mutex_lock(&td->lock); \
2355 pthread_cond_broadcast(&td->cond); \
2356 pthread_mutex_unlock(&td->lock); \
2360 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2361 #define update_pos(td, mb_y, mb_x) while(0)
2364 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2365 int jobnr, int threadnr, int is_vp7)
2367 VP8Context *s = avctx->priv_data;
2368 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2369 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2370 int mb_x, mb_xy = mb_y * s->mb_width;
2371 int num_jobs = s->num_jobs;
2372 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2373 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2376 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2377 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2378 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2381 if (vpX_rac_is_end(c))
2382 return AVERROR_INVALIDDATA;
2387 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2388 if (mb_y == s->mb_height - 1)
2391 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2392 if (s->mb_layout == 1)
2393 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2395 // Make sure the previous frame has read its segmentation map,
2396 // if we re-use the same map.
2397 if (prev_frame && s->segmentation.enabled &&
2398 !s->segmentation.update_map)
2399 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2400 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2401 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2402 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2405 if (!is_vp7 || mb_y == 0)
2406 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2408 td->mv_bounds.mv_min.x = -MARGIN;
2409 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2411 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2412 if (vpX_rac_is_end(c))
2413 return AVERROR_INVALIDDATA;
2414 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2415 if (prev_td != td) {
2416 if (threadnr != 0) {
2417 check_thread_pos(td, prev_td,
2418 mb_x + (is_vp7 ? 2 : 1),
2419 mb_y - (is_vp7 ? 2 : 1));
2421 check_thread_pos(td, prev_td,
2422 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2423 mb_y - (is_vp7 ? 2 : 1));
2427 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2429 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2430 dst[2] - dst[1], 2);
2433 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2434 prev_frame && prev_frame->seg_map ?
2435 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2437 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2440 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2442 if (mb->mode <= MODE_I4x4)
2443 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2445 inter_predict(s, td, dst, mb, mb_x, mb_y);
2447 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2450 idct_mb(s, td, dst, mb);
2452 AV_ZERO64(td->left_nnz);
2453 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2455 /* Reset DC block predictors if they would exist
2456 * if the mb had coefficients */
2457 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2458 td->left_nnz[8] = 0;
2459 s->top_nnz[mb_x][8] = 0;
2463 if (s->deblock_filter)
2464 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2466 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2467 if (s->filter.simple)
2468 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2469 NULL, NULL, s->linesize, 0, 1);
2471 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2472 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2475 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2480 td->mv_bounds.mv_min.x -= 64;
2481 td->mv_bounds.mv_max.x -= 64;
2483 if (mb_x == s->mb_width + 1) {
2484 update_pos(td, mb_y, s->mb_width + 3);
2486 update_pos(td, mb_y, mb_x);
2492 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2493 int jobnr, int threadnr)
2495 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2498 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2499 int jobnr, int threadnr)
2501 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2504 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2505 int jobnr, int threadnr, int is_vp7)
2507 VP8Context *s = avctx->priv_data;
2508 VP8ThreadData *td = &s->thread_data[threadnr];
2509 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2510 AVFrame *curframe = s->curframe->tf.f;
2512 VP8ThreadData *prev_td, *next_td;
2514 curframe->data[0] + 16 * mb_y * s->linesize,
2515 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2516 curframe->data[2] + 8 * mb_y * s->uvlinesize
2519 if (s->mb_layout == 1)
2520 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2522 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2527 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2528 if (mb_y == s->mb_height - 1)
2531 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2533 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2534 VP8FilterStrength *f = &td->filter_strength[mb_x];
2536 check_thread_pos(td, prev_td,
2537 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2539 if (next_td != &s->thread_data[0])
2540 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2542 if (num_jobs == 1) {
2543 if (s->filter.simple)
2544 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2545 NULL, NULL, s->linesize, 0, 1);
2547 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2548 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2551 if (s->filter.simple)
2552 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2554 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2559 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2563 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2564 int jobnr, int threadnr)
2566 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2569 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2570 int jobnr, int threadnr)
2572 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2575 static av_always_inline
2576 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2577 int threadnr, int is_vp7)
2579 VP8Context *s = avctx->priv_data;
2580 VP8ThreadData *td = &s->thread_data[jobnr];
2581 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2582 VP8Frame *curframe = s->curframe;
2583 int mb_y, num_jobs = s->num_jobs;
2586 td->thread_nr = threadnr;
2587 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2588 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2589 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2590 atomic_store(&td->thread_mb_pos, mb_y << 16);
2591 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2593 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2596 if (s->deblock_filter)
2597 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2598 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2600 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2601 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2603 if (avctx->active_thread_type == FF_THREAD_FRAME)
2604 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2610 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2611 int jobnr, int threadnr)
2613 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2616 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2617 int jobnr, int threadnr)
2619 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2622 static av_always_inline
2623 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2624 AVPacket *avpkt, int is_vp7)
2626 VP8Context *s = avctx->priv_data;
2627 int ret, i, referenced, num_jobs;
2628 enum AVDiscard skip_thresh;
2629 VP8Frame *av_uninit(curframe), *prev_frame;
2632 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2634 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2639 if (s->actually_webp) {
2640 // avctx->pix_fmt already set in caller.
2641 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2642 s->pix_fmt = get_pixel_format(s);
2643 if (s->pix_fmt < 0) {
2644 ret = AVERROR(EINVAL);
2647 avctx->pix_fmt = s->pix_fmt;
2650 prev_frame = s->framep[VP56_FRAME_CURRENT];
2652 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2653 s->update_altref == VP56_FRAME_CURRENT;
2655 skip_thresh = !referenced ? AVDISCARD_NONREF
2656 : !s->keyframe ? AVDISCARD_NONKEY
2659 if (avctx->skip_frame >= skip_thresh) {
2661 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2664 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2666 // release no longer referenced frames
2667 for (i = 0; i < 5; i++)
2668 if (s->frames[i].tf.f->buf[0] &&
2669 &s->frames[i] != prev_frame &&
2670 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2671 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2672 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2673 vp8_release_frame(s, &s->frames[i]);
2675 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2678 avctx->colorspace = AVCOL_SPC_BT470BG;
2680 avctx->color_range = AVCOL_RANGE_JPEG;
2682 avctx->color_range = AVCOL_RANGE_MPEG;
2684 /* Given that arithmetic probabilities are updated every frame, it's quite
2685 * likely that the values we have on a random interframe are complete
2686 * junk if we didn't start decode on a keyframe. So just don't display
2687 * anything rather than junk. */
2688 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2689 !s->framep[VP56_FRAME_GOLDEN] ||
2690 !s->framep[VP56_FRAME_GOLDEN2])) {
2691 av_log(avctx, AV_LOG_WARNING,
2692 "Discarding interframe without a prior keyframe!\n");
2693 ret = AVERROR_INVALIDDATA;
2697 curframe->tf.f->key_frame = s->keyframe;
2698 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2699 : AV_PICTURE_TYPE_P;
2700 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2703 // check if golden and altref are swapped
2704 if (s->update_altref != VP56_FRAME_NONE)
2705 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2707 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2709 if (s->update_golden != VP56_FRAME_NONE)
2710 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2712 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2715 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2717 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2719 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2721 if (avctx->codec->update_thread_context)
2722 ff_thread_finish_setup(avctx);
2724 if (avctx->hwaccel) {
2725 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2729 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2733 ret = avctx->hwaccel->end_frame(avctx);
2738 s->linesize = curframe->tf.f->linesize[0];
2739 s->uvlinesize = curframe->tf.f->linesize[1];
2741 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2742 /* Zero macroblock structures for top/top-left prediction
2743 * from outside the frame. */
2745 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2746 (s->mb_width + 1) * sizeof(*s->macroblocks));
2747 if (!s->mb_layout && s->keyframe)
2748 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2750 memset(s->ref_count, 0, sizeof(s->ref_count));
2752 if (s->mb_layout == 1) {
2753 // Make sure the previous frame has read its segmentation map,
2754 // if we re-use the same map.
2755 if (prev_frame && s->segmentation.enabled &&
2756 !s->segmentation.update_map)
2757 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2759 ret = vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2761 ret = vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2766 if (avctx->active_thread_type == FF_THREAD_FRAME)
2769 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2770 s->num_jobs = num_jobs;
2771 s->curframe = curframe;
2772 s->prev_frame = prev_frame;
2773 s->mv_bounds.mv_min.y = -MARGIN;
2774 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2775 for (i = 0; i < MAX_THREADS; i++) {
2776 VP8ThreadData *td = &s->thread_data[i];
2777 atomic_init(&td->thread_mb_pos, 0);
2778 atomic_init(&td->wait_mb_pos, INT_MAX);
2781 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2784 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2788 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2789 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2792 // if future frames don't use the updated probabilities,
2793 // reset them to the values we saved
2794 if (!s->update_probabilities)
2795 s->prob[0] = s->prob[1];
2797 if (!s->invisible) {
2798 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2805 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2809 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2812 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2815 #if CONFIG_VP7_DECODER
2816 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2819 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2821 #endif /* CONFIG_VP7_DECODER */
2823 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2825 VP8Context *s = avctx->priv_data;
2831 vp8_decode_flush_impl(avctx, 1);
2832 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2833 av_frame_free(&s->frames[i].tf.f);
2838 static av_cold int vp8_init_frames(VP8Context *s)
2841 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2842 s->frames[i].tf.f = av_frame_alloc();
2843 if (!s->frames[i].tf.f)
2844 return AVERROR(ENOMEM);
2849 static av_always_inline
2850 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2852 VP8Context *s = avctx->priv_data;
2856 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2857 s->pix_fmt = AV_PIX_FMT_NONE;
2858 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2860 ff_videodsp_init(&s->vdsp, 8);
2862 ff_vp78dsp_init(&s->vp8dsp);
2863 if (CONFIG_VP7_DECODER && is_vp7) {
2864 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2865 ff_vp7dsp_init(&s->vp8dsp);
2866 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2867 s->filter_mb_row = vp7_filter_mb_row;
2868 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2869 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2870 ff_vp8dsp_init(&s->vp8dsp);
2871 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2872 s->filter_mb_row = vp8_filter_mb_row;
2875 /* does not change for VP8 */
2876 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2878 if ((ret = vp8_init_frames(s)) < 0) {
2879 ff_vp8_decode_free(avctx);
2886 #if CONFIG_VP7_DECODER
2887 static int vp7_decode_init(AVCodecContext *avctx)
2889 return vp78_decode_init(avctx, IS_VP7);
2891 #endif /* CONFIG_VP7_DECODER */
2893 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2895 return vp78_decode_init(avctx, IS_VP8);
2898 #if CONFIG_VP8_DECODER
2900 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2902 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2903 const AVCodecContext *src)
2905 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2908 if (s->macroblocks_base &&
2909 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2911 s->mb_width = s_src->mb_width;
2912 s->mb_height = s_src->mb_height;
2915 s->pix_fmt = s_src->pix_fmt;
2916 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2917 s->segmentation = s_src->segmentation;
2918 s->lf_delta = s_src->lf_delta;
2919 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2921 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2922 if (s_src->frames[i].tf.f->buf[0]) {
2923 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2929 s->framep[0] = REBASE(s_src->next_framep[0]);
2930 s->framep[1] = REBASE(s_src->next_framep[1]);
2931 s->framep[2] = REBASE(s_src->next_framep[2]);
2932 s->framep[3] = REBASE(s_src->next_framep[3]);
2936 #endif /* HAVE_THREADS */
2937 #endif /* CONFIG_VP8_DECODER */
2939 #if CONFIG_VP7_DECODER
2940 AVCodec ff_vp7_decoder = {
2942 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2943 .type = AVMEDIA_TYPE_VIDEO,
2944 .id = AV_CODEC_ID_VP7,
2945 .priv_data_size = sizeof(VP8Context),
2946 .init = vp7_decode_init,
2947 .close = ff_vp8_decode_free,
2948 .decode = vp7_decode_frame,
2949 .capabilities = AV_CODEC_CAP_DR1,
2950 .flush = vp8_decode_flush,
2952 #endif /* CONFIG_VP7_DECODER */
2954 #if CONFIG_VP8_DECODER
2955 AVCodec ff_vp8_decoder = {
2957 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2958 .type = AVMEDIA_TYPE_VIDEO,
2959 .id = AV_CODEC_ID_VP8,
2960 .priv_data_size = sizeof(VP8Context),
2961 .init = ff_vp8_decode_init,
2962 .close = ff_vp8_decode_free,
2963 .decode = ff_vp8_decode_frame,
2964 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2965 AV_CODEC_CAP_SLICE_THREADS,
2966 .flush = vp8_decode_flush,
2967 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2968 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2969 #if CONFIG_VP8_VAAPI_HWACCEL
2972 #if CONFIG_VP8_NVDEC_HWACCEL
2977 .caps_internal = FF_CODEC_CAP_ALLOCATE_PROGRESS,
2979 #endif /* CONFIG_VP7_DECODER */