2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
43 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
44 #elif CONFIG_VP7_DECODER
45 #define VPX(vp7, f) vp7_ ## f
46 #else // CONFIG_VP8_DECODER
47 #define VPX(vp7, f) vp8_ ## f
50 static void free_buffers(VP8Context *s)
54 for (i = 0; i < MAX_THREADS; i++) {
56 pthread_cond_destroy(&s->thread_data[i].cond);
57 pthread_mutex_destroy(&s->thread_data[i].lock);
59 av_freep(&s->thread_data[i].filter_strength);
61 av_freep(&s->thread_data);
62 av_freep(&s->macroblocks_base);
63 av_freep(&s->intra4x4_pred_mode_top);
64 av_freep(&s->top_nnz);
65 av_freep(&s->top_border);
67 s->macroblocks = NULL;
70 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
73 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
74 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
76 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
78 if (s->avctx->hwaccel) {
79 const AVHWAccel *hwaccel = s->avctx->hwaccel;
80 if (hwaccel->frame_priv_data_size) {
81 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
82 if (!f->hwaccel_priv_buf)
84 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
90 av_buffer_unref(&f->seg_map);
91 ff_thread_release_buffer(s->avctx, &f->tf);
92 return AVERROR(ENOMEM);
95 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
97 av_buffer_unref(&f->seg_map);
98 av_buffer_unref(&f->hwaccel_priv_buf);
99 f->hwaccel_picture_private = NULL;
100 ff_thread_release_buffer(s->avctx, &f->tf);
103 #if CONFIG_VP8_DECODER
104 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
108 vp8_release_frame(s, dst);
110 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
113 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
114 vp8_release_frame(s, dst);
115 return AVERROR(ENOMEM);
117 if (src->hwaccel_picture_private) {
118 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
119 if (!dst->hwaccel_priv_buf)
120 return AVERROR(ENOMEM);
121 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
126 #endif /* CONFIG_VP8_DECODER */
128 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
130 VP8Context *s = avctx->priv_data;
133 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
134 vp8_release_frame(s, &s->frames[i]);
135 memset(s->framep, 0, sizeof(s->framep));
141 static void vp8_decode_flush(AVCodecContext *avctx)
143 vp8_decode_flush_impl(avctx, 0);
146 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
148 VP8Frame *frame = NULL;
151 // find a free buffer
152 for (i = 0; i < 5; i++)
153 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
154 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
155 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
156 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
157 frame = &s->frames[i];
161 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
164 if (frame->tf.f->buf[0])
165 vp8_release_frame(s, frame);
170 static enum AVPixelFormat get_pixel_format(VP8Context *s)
172 enum AVPixelFormat pix_fmts[] = {
173 #if CONFIG_VP8_VAAPI_HWACCEL
176 #if CONFIG_VP8_NVDEC_HWACCEL
183 return ff_get_format(s->avctx, pix_fmts);
186 static av_always_inline
187 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
189 AVCodecContext *avctx = s->avctx;
192 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
193 height != s->avctx->height) {
194 vp8_decode_flush_impl(s->avctx, 1);
196 ret = ff_set_dimensions(s->avctx, width, height);
201 if (!s->actually_webp && !is_vp7) {
202 s->pix_fmt = get_pixel_format(s);
204 return AVERROR(EINVAL);
205 avctx->pix_fmt = s->pix_fmt;
208 s->mb_width = (s->avctx->coded_width + 15) / 16;
209 s->mb_height = (s->avctx->coded_height + 15) / 16;
211 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
212 avctx->thread_count > 1;
213 if (!s->mb_layout) { // Frame threading and one thread
214 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
215 sizeof(*s->macroblocks));
216 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
217 } else // Sliced threading
218 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
219 sizeof(*s->macroblocks));
220 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
221 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
222 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
224 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
225 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
227 return AVERROR(ENOMEM);
230 for (i = 0; i < MAX_THREADS; i++) {
231 s->thread_data[i].filter_strength =
232 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
233 if (!s->thread_data[i].filter_strength) {
235 return AVERROR(ENOMEM);
238 pthread_mutex_init(&s->thread_data[i].lock, NULL);
239 pthread_cond_init(&s->thread_data[i].cond, NULL);
243 s->macroblocks = s->macroblocks_base + 1;
248 static int vp7_update_dimensions(VP8Context *s, int width, int height)
250 return update_dimensions(s, width, height, IS_VP7);
253 static int vp8_update_dimensions(VP8Context *s, int width, int height)
255 return update_dimensions(s, width, height, IS_VP8);
259 static void parse_segment_info(VP8Context *s)
261 VP56RangeCoder *c = &s->c;
264 s->segmentation.update_map = vp8_rac_get(c);
265 s->segmentation.update_feature_data = vp8_rac_get(c);
267 if (s->segmentation.update_feature_data) {
268 s->segmentation.absolute_vals = vp8_rac_get(c);
270 for (i = 0; i < 4; i++)
271 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
273 for (i = 0; i < 4; i++)
274 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
276 if (s->segmentation.update_map)
277 for (i = 0; i < 3; i++)
278 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
281 static void update_lf_deltas(VP8Context *s)
283 VP56RangeCoder *c = &s->c;
286 for (i = 0; i < 4; i++) {
287 if (vp8_rac_get(c)) {
288 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
291 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
295 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
296 if (vp8_rac_get(c)) {
297 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
300 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
305 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
307 const uint8_t *sizes = buf;
311 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
313 buf += 3 * (s->num_coeff_partitions - 1);
314 buf_size -= 3 * (s->num_coeff_partitions - 1);
318 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
319 int size = AV_RL24(sizes + 3 * i);
320 if (buf_size - size < 0)
322 s->coeff_partition_size[i] = size;
324 ret = ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
331 s->coeff_partition_size[i] = buf_size;
332 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
337 static void vp7_get_quants(VP8Context *s)
339 VP56RangeCoder *c = &s->c;
341 int yac_qi = vp8_rac_get_uint(c, 7);
342 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
343 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
344 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
345 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
346 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
348 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
349 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
350 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
351 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
352 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
353 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
356 static void vp8_get_quants(VP8Context *s)
358 VP56RangeCoder *c = &s->c;
361 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
362 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
363 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
364 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
365 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
366 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
368 for (i = 0; i < 4; i++) {
369 if (s->segmentation.enabled) {
370 base_qi = s->segmentation.base_quant[i];
371 if (!s->segmentation.absolute_vals)
372 base_qi += s->quant.yac_qi;
374 base_qi = s->quant.yac_qi;
376 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
377 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
378 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
379 /* 101581>>16 is equivalent to 155/100 */
380 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
381 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
382 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
384 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
385 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
390 * Determine which buffers golden and altref should be updated with after this frame.
391 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
393 * Intra frames update all 3 references
394 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
395 * If the update (golden|altref) flag is set, it's updated with the current frame
396 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
397 * If the flag is not set, the number read means:
399 * 1: VP56_FRAME_PREVIOUS
400 * 2: update golden with altref, or update altref with golden
402 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
404 VP56RangeCoder *c = &s->c;
407 return VP56_FRAME_CURRENT;
409 switch (vp8_rac_get_uint(c, 2)) {
411 return VP56_FRAME_PREVIOUS;
413 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
415 return VP56_FRAME_NONE;
418 static void vp78_reset_probability_tables(VP8Context *s)
421 for (i = 0; i < 4; i++)
422 for (j = 0; j < 16; j++)
423 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
424 sizeof(s->prob->token[i][j]));
427 static void vp78_update_probability_tables(VP8Context *s)
429 VP56RangeCoder *c = &s->c;
432 for (i = 0; i < 4; i++)
433 for (j = 0; j < 8; j++)
434 for (k = 0; k < 3; k++)
435 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
436 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
437 int prob = vp8_rac_get_uint(c, 8);
438 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
439 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
443 #define VP7_MVC_SIZE 17
444 #define VP8_MVC_SIZE 19
446 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
449 VP56RangeCoder *c = &s->c;
453 for (i = 0; i < 4; i++)
454 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
456 for (i = 0; i < 3; i++)
457 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
459 // 17.2 MV probability update
460 for (i = 0; i < 2; i++)
461 for (j = 0; j < mvc_size; j++)
462 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
463 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
466 static void update_refs(VP8Context *s)
468 VP56RangeCoder *c = &s->c;
470 int update_golden = vp8_rac_get(c);
471 int update_altref = vp8_rac_get(c);
473 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
474 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
477 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
481 for (j = 1; j < 3; j++) {
482 for (i = 0; i < height / 2; i++)
483 memcpy(dst->data[j] + i * dst->linesize[j],
484 src->data[j] + i * src->linesize[j], width / 2);
488 static void fade(uint8_t *dst, ptrdiff_t dst_linesize,
489 const uint8_t *src, ptrdiff_t src_linesize,
490 int width, int height,
494 for (j = 0; j < height; j++) {
495 for (i = 0; i < width; i++) {
496 uint8_t y = src[j * src_linesize + i];
497 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
502 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
504 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
505 int beta = (int8_t) vp8_rac_get_uint(c, 8);
508 if (!s->keyframe && (alpha || beta)) {
509 int width = s->mb_width * 16;
510 int height = s->mb_height * 16;
513 if (!s->framep[VP56_FRAME_PREVIOUS] ||
514 !s->framep[VP56_FRAME_GOLDEN]) {
515 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
516 return AVERROR_INVALIDDATA;
520 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
522 /* preserve the golden frame, write a new previous frame */
523 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
524 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
525 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
528 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
530 copy_chroma(dst, src, width, height);
533 fade(dst->data[0], dst->linesize[0],
534 src->data[0], src->linesize[0],
535 width, height, alpha, beta);
541 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
543 VP56RangeCoder *c = &s->c;
544 int part1_size, hscale, vscale, i, j, ret;
545 int width = s->avctx->width;
546 int height = s->avctx->height;
549 return AVERROR_INVALIDDATA;
552 s->profile = (buf[0] >> 1) & 7;
553 if (s->profile > 1) {
554 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
555 return AVERROR_INVALIDDATA;
558 s->keyframe = !(buf[0] & 1);
560 part1_size = AV_RL24(buf) >> 4;
562 if (buf_size < 4 - s->profile + part1_size) {
563 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
564 return AVERROR_INVALIDDATA;
567 buf += 4 - s->profile;
568 buf_size -= 4 - s->profile;
570 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
572 ret = ff_vp56_init_range_decoder(c, buf, part1_size);
576 buf_size -= part1_size;
578 /* A. Dimension information (keyframes only) */
580 width = vp8_rac_get_uint(c, 12);
581 height = vp8_rac_get_uint(c, 12);
582 hscale = vp8_rac_get_uint(c, 2);
583 vscale = vp8_rac_get_uint(c, 2);
584 if (hscale || vscale)
585 avpriv_request_sample(s->avctx, "Upscaling");
587 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
588 vp78_reset_probability_tables(s);
589 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
590 sizeof(s->prob->pred16x16));
591 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
592 sizeof(s->prob->pred8x8c));
593 for (i = 0; i < 2; i++)
594 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
595 sizeof(vp7_mv_default_prob[i]));
596 memset(&s->segmentation, 0, sizeof(s->segmentation));
597 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
598 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
601 if (s->keyframe || s->profile > 0)
602 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
604 /* B. Decoding information for all four macroblock-level features */
605 for (i = 0; i < 4; i++) {
606 s->feature_enabled[i] = vp8_rac_get(c);
607 if (s->feature_enabled[i]) {
608 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
610 for (j = 0; j < 3; j++)
611 s->feature_index_prob[i][j] =
612 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
614 if (vp7_feature_value_size[s->profile][i])
615 for (j = 0; j < 4; j++)
616 s->feature_value[i][j] =
617 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
621 s->segmentation.enabled = 0;
622 s->segmentation.update_map = 0;
623 s->lf_delta.enabled = 0;
625 s->num_coeff_partitions = 1;
626 ret = ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
630 if (!s->macroblocks_base || /* first frame */
631 width != s->avctx->width || height != s->avctx->height ||
632 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
633 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
637 /* C. Dequantization indices */
640 /* D. Golden frame update flag (a Flag) for interframes only */
642 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
643 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
647 s->update_probabilities = 1;
650 if (s->profile > 0) {
651 s->update_probabilities = vp8_rac_get(c);
652 if (!s->update_probabilities)
653 s->prob[1] = s->prob[0];
656 s->fade_present = vp8_rac_get(c);
659 /* E. Fading information for previous frame */
660 if (s->fade_present && vp8_rac_get(c)) {
661 if ((ret = vp7_fade_frame(s ,c)) < 0)
665 /* F. Loop filter type */
667 s->filter.simple = vp8_rac_get(c);
669 /* G. DCT coefficient ordering specification */
671 for (i = 1; i < 16; i++)
672 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
674 /* H. Loop filter levels */
676 s->filter.simple = vp8_rac_get(c);
677 s->filter.level = vp8_rac_get_uint(c, 6);
678 s->filter.sharpness = vp8_rac_get_uint(c, 3);
680 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
681 vp78_update_probability_tables(s);
683 s->mbskip_enabled = 0;
685 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
687 s->prob->intra = vp8_rac_get_uint(c, 8);
688 s->prob->last = vp8_rac_get_uint(c, 8);
689 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
695 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
697 VP56RangeCoder *c = &s->c;
698 int header_size, hscale, vscale, ret;
699 int width = s->avctx->width;
700 int height = s->avctx->height;
703 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
704 return AVERROR_INVALIDDATA;
707 s->keyframe = !(buf[0] & 1);
708 s->profile = (buf[0]>>1) & 7;
709 s->invisible = !(buf[0] & 0x10);
710 header_size = AV_RL24(buf) >> 5;
714 s->header_partition_size = header_size;
717 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
720 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
721 sizeof(s->put_pixels_tab));
722 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
723 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
724 sizeof(s->put_pixels_tab));
726 if (header_size > buf_size - 7 * s->keyframe) {
727 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
728 return AVERROR_INVALIDDATA;
732 if (AV_RL24(buf) != 0x2a019d) {
733 av_log(s->avctx, AV_LOG_ERROR,
734 "Invalid start code 0x%x\n", AV_RL24(buf));
735 return AVERROR_INVALIDDATA;
737 width = AV_RL16(buf + 3) & 0x3fff;
738 height = AV_RL16(buf + 5) & 0x3fff;
739 hscale = buf[4] >> 6;
740 vscale = buf[6] >> 6;
744 if (hscale || vscale)
745 avpriv_request_sample(s->avctx, "Upscaling");
747 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
748 vp78_reset_probability_tables(s);
749 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
750 sizeof(s->prob->pred16x16));
751 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
752 sizeof(s->prob->pred8x8c));
753 memcpy(s->prob->mvc, vp8_mv_default_prob,
754 sizeof(s->prob->mvc));
755 memset(&s->segmentation, 0, sizeof(s->segmentation));
756 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
759 ret = ff_vp56_init_range_decoder(c, buf, header_size);
763 buf_size -= header_size;
766 s->colorspace = vp8_rac_get(c);
768 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
769 s->fullrange = vp8_rac_get(c);
772 if ((s->segmentation.enabled = vp8_rac_get(c)))
773 parse_segment_info(s);
775 s->segmentation.update_map = 0; // FIXME: move this to some init function?
777 s->filter.simple = vp8_rac_get(c);
778 s->filter.level = vp8_rac_get_uint(c, 6);
779 s->filter.sharpness = vp8_rac_get_uint(c, 3);
781 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
782 s->lf_delta.update = vp8_rac_get(c);
783 if (s->lf_delta.update)
787 if (setup_partitions(s, buf, buf_size)) {
788 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
789 return AVERROR_INVALIDDATA;
792 if (!s->macroblocks_base || /* first frame */
793 width != s->avctx->width || height != s->avctx->height ||
794 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
795 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
802 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
803 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
806 // if we aren't saving this frame's probabilities for future frames,
807 // make a copy of the current probabilities
808 if (!(s->update_probabilities = vp8_rac_get(c)))
809 s->prob[1] = s->prob[0];
811 s->update_last = s->keyframe || vp8_rac_get(c);
813 vp78_update_probability_tables(s);
815 if ((s->mbskip_enabled = vp8_rac_get(c)))
816 s->prob->mbskip = vp8_rac_get_uint(c, 8);
819 s->prob->intra = vp8_rac_get_uint(c, 8);
820 s->prob->last = vp8_rac_get_uint(c, 8);
821 s->prob->golden = vp8_rac_get_uint(c, 8);
822 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
825 // Record the entropy coder state here so that hwaccels can use it.
826 s->c.code_word = vp56_rac_renorm(&s->c);
827 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
828 s->coder_state_at_header_end.range = s->c.high;
829 s->coder_state_at_header_end.value = s->c.code_word >> 16;
830 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
835 static av_always_inline
836 void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src)
838 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
839 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
840 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
841 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
845 * Motion vector coding, 17.1.
847 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
851 if (vp56_rac_get_prob_branchy(c, p[0])) {
854 for (i = 0; i < 3; i++)
855 x += vp56_rac_get_prob(c, p[9 + i]) << i;
856 for (i = (vp7 ? 7 : 9); i > 3; i--)
857 x += vp56_rac_get_prob(c, p[9 + i]) << i;
858 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
862 const uint8_t *ps = p + 2;
863 bit = vp56_rac_get_prob(c, *ps);
866 bit = vp56_rac_get_prob(c, *ps);
869 x += vp56_rac_get_prob(c, *ps);
872 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
875 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
877 return read_mv_component(c, p, 1);
880 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
882 return read_mv_component(c, p, 0);
885 static av_always_inline
886 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
889 return vp7_submv_prob;
892 return vp8_submv_prob[4 - !!left];
894 return vp8_submv_prob[2];
895 return vp8_submv_prob[1 - !!left];
899 * Split motion vector prediction, 16.4.
900 * @returns the number of motion vectors parsed (2, 4 or 16)
902 static av_always_inline
903 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
904 int layout, int is_vp7)
908 VP8Macroblock *top_mb;
909 VP8Macroblock *left_mb = &mb[-1];
910 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
911 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
913 VP56mv *left_mv = left_mb->bmv;
914 VP56mv *cur_mv = mb->bmv;
916 if (!layout) // layout is inlined, s->mb_layout is not
919 top_mb = &mb[-s->mb_width - 1];
920 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
921 top_mv = top_mb->bmv;
923 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
924 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
925 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
927 part_idx = VP8_SPLITMVMODE_8x8;
929 part_idx = VP8_SPLITMVMODE_4x4;
932 num = vp8_mbsplit_count[part_idx];
933 mbsplits_cur = vp8_mbsplits[part_idx],
934 firstidx = vp8_mbfirstidx[part_idx];
935 mb->partitioning = part_idx;
937 for (n = 0; n < num; n++) {
939 uint32_t left, above;
940 const uint8_t *submv_prob;
943 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
945 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
947 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
949 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
951 submv_prob = get_submv_prob(left, above, is_vp7);
953 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
954 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
955 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
956 mb->bmv[n].y = mb->mv.y +
957 read_mv_component(c, s->prob->mvc[0], is_vp7);
958 mb->bmv[n].x = mb->mv.x +
959 read_mv_component(c, s->prob->mvc[1], is_vp7);
961 AV_ZERO32(&mb->bmv[n]);
964 AV_WN32A(&mb->bmv[n], above);
967 AV_WN32A(&mb->bmv[n], left);
975 * The vp7 reference decoder uses a padding macroblock column (added to right
976 * edge of the frame) to guard against illegal macroblock offsets. The
977 * algorithm has bugs that permit offsets to straddle the padding column.
978 * This function replicates those bugs.
980 * @param[out] edge_x macroblock x address
981 * @param[out] edge_y macroblock y address
983 * @return macroblock offset legal (boolean)
985 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
986 int xoffset, int yoffset, int boundary,
987 int *edge_x, int *edge_y)
989 int vwidth = mb_width + 1;
990 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
991 if (new < boundary || new % vwidth == vwidth - 1)
993 *edge_y = new / vwidth;
994 *edge_x = new % vwidth;
998 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
1000 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
1003 static av_always_inline
1004 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1005 int mb_x, int mb_y, int layout)
1007 VP8Macroblock *mb_edge[12];
1008 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
1009 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1012 uint8_t cnt[3] = { 0 };
1013 VP56RangeCoder *c = &s->c;
1016 AV_ZERO32(&near_mv[0]);
1017 AV_ZERO32(&near_mv[1]);
1018 AV_ZERO32(&near_mv[2]);
1020 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
1021 const VP7MVPred * pred = &vp7_mv_pred[i];
1024 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
1025 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
1026 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
1027 ? s->macroblocks_base + 1 + edge_x +
1028 (s->mb_width + 1) * (edge_y + 1)
1029 : s->macroblocks + edge_x +
1030 (s->mb_height - edge_y - 1) * 2;
1031 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
1033 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
1034 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
1036 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
1037 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
1041 AV_WN32A(&near_mv[CNT_NEAR], mv);
1045 AV_WN32A(&near_mv[CNT_NEAREST], mv);
1054 cnt[idx] += vp7_mv_pred[i].score;
1057 mb->partitioning = VP8_SPLITMVMODE_NONE;
1059 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
1060 mb->mode = VP8_MVMODE_MV;
1062 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
1064 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1066 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1067 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1069 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1071 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1072 mb->mode = VP8_MVMODE_SPLIT;
1073 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1075 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1076 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1077 mb->bmv[0] = mb->mv;
1080 mb->mv = near_mv[CNT_NEAR];
1081 mb->bmv[0] = mb->mv;
1084 mb->mv = near_mv[CNT_NEAREST];
1085 mb->bmv[0] = mb->mv;
1088 mb->mode = VP8_MVMODE_ZERO;
1090 mb->bmv[0] = mb->mv;
1094 static av_always_inline
1095 void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb,
1096 int mb_x, int mb_y, int layout)
1098 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1101 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1102 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1104 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1105 int8_t *sign_bias = s->sign_bias;
1107 uint8_t cnt[4] = { 0 };
1108 VP56RangeCoder *c = &s->c;
1110 if (!layout) { // layout is inlined (s->mb_layout is not)
1111 mb_edge[0] = mb + 2;
1112 mb_edge[2] = mb + 1;
1114 mb_edge[0] = mb - s->mb_width - 1;
1115 mb_edge[2] = mb - s->mb_width - 2;
1118 AV_ZERO32(&near_mv[0]);
1119 AV_ZERO32(&near_mv[1]);
1120 AV_ZERO32(&near_mv[2]);
1122 /* Process MB on top, left and top-left */
1123 #define MV_EDGE_CHECK(n) \
1125 VP8Macroblock *edge = mb_edge[n]; \
1126 int edge_ref = edge->ref_frame; \
1127 if (edge_ref != VP56_FRAME_CURRENT) { \
1128 uint32_t mv = AV_RN32A(&edge->mv); \
1130 if (cur_sign_bias != sign_bias[edge_ref]) { \
1131 /* SWAR negate of the values in mv. */ \
1133 mv = ((mv & 0x7fff7fff) + \
1134 0x00010001) ^ (mv & 0x80008000); \
1136 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1137 AV_WN32A(&near_mv[++idx], mv); \
1138 cnt[idx] += 1 + (n != 2); \
1140 cnt[CNT_ZERO] += 1 + (n != 2); \
1148 mb->partitioning = VP8_SPLITMVMODE_NONE;
1149 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1150 mb->mode = VP8_MVMODE_MV;
1152 /* If we have three distinct MVs, merge first and last if they're the same */
1153 if (cnt[CNT_SPLITMV] &&
1154 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1155 cnt[CNT_NEAREST] += 1;
1157 /* Swap near and nearest if necessary */
1158 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1159 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1160 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1163 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1164 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1165 /* Choose the best mv out of 0,0 and the nearest mv */
1166 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1167 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1168 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1169 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1171 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1172 mb->mode = VP8_MVMODE_SPLIT;
1173 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1175 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1176 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1177 mb->bmv[0] = mb->mv;
1180 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]);
1181 mb->bmv[0] = mb->mv;
1184 clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]);
1185 mb->bmv[0] = mb->mv;
1188 mb->mode = VP8_MVMODE_ZERO;
1190 mb->bmv[0] = mb->mv;
1194 static av_always_inline
1195 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1196 int mb_x, int keyframe, int layout)
1198 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1201 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1202 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1207 uint8_t *const left = s->intra4x4_pred_mode_left;
1209 top = mb->intra4x4_pred_mode_top;
1211 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1212 for (y = 0; y < 4; y++) {
1213 for (x = 0; x < 4; x++) {
1215 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1216 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1217 left[y] = top[x] = *intra4x4;
1223 for (i = 0; i < 16; i++)
1224 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1225 vp8_pred4x4_prob_inter);
1229 static av_always_inline
1230 void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds,
1231 VP8Macroblock *mb, int mb_x, int mb_y,
1232 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1234 VP56RangeCoder *c = &s->c;
1235 static const char * const vp7_feature_name[] = { "q-index",
1237 "partial-golden-update",
1242 for (i = 0; i < 4; i++) {
1243 if (s->feature_enabled[i]) {
1244 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1245 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1246 s->feature_index_prob[i]);
1247 av_log(s->avctx, AV_LOG_WARNING,
1248 "Feature %s present in macroblock (value 0x%x)\n",
1249 vp7_feature_name[i], s->feature_value[i][index]);
1253 } else if (s->segmentation.update_map) {
1254 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1255 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1256 } else if (s->segmentation.enabled)
1257 *segment = ref ? *ref : *segment;
1258 mb->segment = *segment;
1260 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1263 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1264 vp8_pred16x16_prob_intra);
1266 if (mb->mode == MODE_I4x4) {
1267 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1269 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1270 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1272 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1274 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1275 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1278 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1279 vp8_pred8x8c_prob_intra);
1280 mb->ref_frame = VP56_FRAME_CURRENT;
1281 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1283 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1285 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1286 : VP56_FRAME_GOLDEN;
1288 mb->ref_frame = VP56_FRAME_PREVIOUS;
1289 s->ref_count[mb->ref_frame - 1]++;
1291 // motion vectors, 16.3
1293 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1295 vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout);
1298 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1300 if (mb->mode == MODE_I4x4)
1301 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1303 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1305 mb->ref_frame = VP56_FRAME_CURRENT;
1306 mb->partitioning = VP8_SPLITMVMODE_NONE;
1307 AV_ZERO32(&mb->bmv[0]);
1312 * @param r arithmetic bitstream reader context
1313 * @param block destination for block coefficients
1314 * @param probs probabilities to use when reading trees from the bitstream
1315 * @param i initial coeff index, 0 unless a separate DC block is coded
1316 * @param qmul array holding the dc/ac dequant factor at position 0/1
1318 * @return 0 if no coeffs were decoded
1319 * otherwise, the index of the last coeff decoded plus one
1321 static av_always_inline
1322 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1323 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1324 int i, uint8_t *token_prob, int16_t qmul[2],
1325 const uint8_t scan[16], int vp7)
1327 VP56RangeCoder c = *r;
1332 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1336 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1338 break; // invalid input; blocks should end with EOB
1339 token_prob = probs[i][0];
1345 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1347 token_prob = probs[i + 1][1];
1349 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1350 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1352 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1356 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1357 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1358 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1359 } else { // DCT_CAT2
1361 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1362 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1364 } else { // DCT_CAT3 and up
1365 int a = vp56_rac_get_prob(&c, token_prob[8]);
1366 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1367 int cat = (a << 1) + b;
1368 coeff = 3 + (8 << cat);
1369 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1372 token_prob = probs[i + 1][2];
1374 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1381 static av_always_inline
1382 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1384 int16_t dc = block[0];
1392 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1393 block[0] = pred[0] = dc;
1398 block[0] = pred[0] = dc;
1404 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1406 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1407 int i, uint8_t *token_prob,
1409 const uint8_t scan[16])
1411 return decode_block_coeffs_internal(r, block, probs, i,
1412 token_prob, qmul, scan, IS_VP7);
1415 #ifndef vp8_decode_block_coeffs_internal
1416 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1418 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1419 int i, uint8_t *token_prob,
1422 return decode_block_coeffs_internal(r, block, probs, i,
1423 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1428 * @param c arithmetic bitstream reader context
1429 * @param block destination for block coefficients
1430 * @param probs probabilities to use when reading trees from the bitstream
1431 * @param i initial coeff index, 0 unless a separate DC block is coded
1432 * @param zero_nhood the initial prediction context for number of surrounding
1433 * all-zero blocks (only left/top, so 0-2)
1434 * @param qmul array holding the dc/ac dequant factor at position 0/1
1435 * @param scan scan pattern (VP7 only)
1437 * @return 0 if no coeffs were decoded
1438 * otherwise, the index of the last coeff decoded plus one
1440 static av_always_inline
1441 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1442 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1443 int i, int zero_nhood, int16_t qmul[2],
1444 const uint8_t scan[16], int vp7)
1446 uint8_t *token_prob = probs[i][zero_nhood];
1447 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1449 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1450 token_prob, qmul, scan)
1451 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1455 static av_always_inline
1456 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1457 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1460 int i, x, y, luma_start = 0, luma_ctx = 3;
1461 int nnz_pred, nnz, nnz_total = 0;
1462 int segment = mb->segment;
1465 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1466 nnz_pred = t_nnz[8] + l_nnz[8];
1468 // decode DC values and do hadamard
1469 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1470 nnz_pred, s->qmat[segment].luma_dc_qmul,
1471 ff_zigzag_scan, is_vp7);
1472 l_nnz[8] = t_nnz[8] = !!nnz;
1474 if (is_vp7 && mb->mode > MODE_I4x4) {
1475 nnz |= inter_predict_dc(td->block_dc,
1476 s->inter_dc_pred[mb->ref_frame - 1]);
1483 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1485 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1492 for (y = 0; y < 4; y++)
1493 for (x = 0; x < 4; x++) {
1494 nnz_pred = l_nnz[y] + t_nnz[x];
1495 nnz = decode_block_coeffs(c, td->block[y][x],
1496 s->prob->token[luma_ctx],
1497 luma_start, nnz_pred,
1498 s->qmat[segment].luma_qmul,
1499 s->prob[0].scan, is_vp7);
1500 /* nnz+block_dc may be one more than the actual last index,
1501 * but we don't care */
1502 td->non_zero_count_cache[y][x] = nnz + block_dc;
1503 t_nnz[x] = l_nnz[y] = !!nnz;
1508 // TODO: what to do about dimensions? 2nd dim for luma is x,
1509 // but for chroma it's (y<<1)|x
1510 for (i = 4; i < 6; i++)
1511 for (y = 0; y < 2; y++)
1512 for (x = 0; x < 2; x++) {
1513 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1514 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1515 s->prob->token[2], 0, nnz_pred,
1516 s->qmat[segment].chroma_qmul,
1517 s->prob[0].scan, is_vp7);
1518 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1519 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1523 // if there were no coded coeffs despite the macroblock not being marked skip,
1524 // we MUST not do the inner loop filter and should not do IDCT
1525 // Since skip isn't used for bitstream prediction, just manually set it.
1530 static av_always_inline
1531 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1532 uint8_t *src_cb, uint8_t *src_cr,
1533 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1535 AV_COPY128(top_border, src_y + 15 * linesize);
1537 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1538 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1542 static av_always_inline
1543 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1544 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1545 int mb_y, int mb_width, int simple, int xchg)
1547 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1549 src_cb -= uvlinesize;
1550 src_cr -= uvlinesize;
1552 #define XCHG(a, b, xchg) \
1560 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1561 XCHG(top_border, src_y, xchg);
1562 XCHG(top_border + 8, src_y + 8, 1);
1563 if (mb_x < mb_width - 1)
1564 XCHG(top_border + 32, src_y + 16, 1);
1566 // only copy chroma for normal loop filter
1567 // or to initialize the top row to 127
1568 if (!simple || !mb_y) {
1569 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1570 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1571 XCHG(top_border + 16, src_cb, 1);
1572 XCHG(top_border + 24, src_cr, 1);
1576 static av_always_inline
1577 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1580 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1582 return mb_y ? mode : LEFT_DC_PRED8x8;
1585 static av_always_inline
1586 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1589 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1591 return mb_y ? mode : HOR_PRED8x8;
1594 static av_always_inline
1595 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1599 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1601 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1603 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1604 case PLANE_PRED8x8: /* TM */
1605 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1610 static av_always_inline
1611 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1614 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1616 return mb_y ? mode : HOR_VP8_PRED;
1620 static av_always_inline
1621 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1622 int *copy_buf, int vp7)
1626 if (!mb_x && mb_y) {
1631 case DIAG_DOWN_LEFT_PRED:
1632 case VERT_LEFT_PRED:
1633 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1641 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1643 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1644 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1645 * as 16x16/8x8 DC */
1646 case DIAG_DOWN_RIGHT_PRED:
1647 case VERT_RIGHT_PRED:
1656 static av_always_inline
1657 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1658 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1660 int x, y, mode, nnz;
1663 /* for the first row, we need to run xchg_mb_border to init the top edge
1664 * to 127 otherwise, skip it if we aren't going to deblock */
1665 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1666 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1667 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1668 s->filter.simple, 1);
1670 if (mb->mode < MODE_I4x4) {
1671 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1672 s->hpc.pred16x16[mode](dst[0], s->linesize);
1674 uint8_t *ptr = dst[0];
1675 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1676 const uint8_t lo = is_vp7 ? 128 : 127;
1677 const uint8_t hi = is_vp7 ? 128 : 129;
1678 uint8_t tr_top[4] = { lo, lo, lo, lo };
1680 // all blocks on the right edge of the macroblock use bottom edge
1681 // the top macroblock for their topright edge
1682 uint8_t *tr_right = ptr - s->linesize + 16;
1684 // if we're on the right edge of the frame, said edge is extended
1685 // from the top macroblock
1686 if (mb_y && mb_x == s->mb_width - 1) {
1687 tr = tr_right[-1] * 0x01010101u;
1688 tr_right = (uint8_t *) &tr;
1692 AV_ZERO128(td->non_zero_count_cache);
1694 for (y = 0; y < 4; y++) {
1695 uint8_t *topright = ptr + 4 - s->linesize;
1696 for (x = 0; x < 4; x++) {
1698 ptrdiff_t linesize = s->linesize;
1699 uint8_t *dst = ptr + 4 * x;
1700 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1702 if ((y == 0 || x == 3) && mb_y == 0) {
1705 topright = tr_right;
1707 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1708 mb_y + y, ©, is_vp7);
1710 dst = copy_dst + 12;
1714 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1716 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1720 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1729 copy_dst[11] = ptr[4 * x - 1];
1730 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1731 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1732 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1735 s->hpc.pred4x4[mode](dst, topright, linesize);
1737 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1738 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1739 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1740 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1743 nnz = td->non_zero_count_cache[y][x];
1746 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1747 td->block[y][x], s->linesize);
1749 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1750 td->block[y][x], s->linesize);
1755 ptr += 4 * s->linesize;
1760 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1761 mb_x, mb_y, is_vp7);
1762 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1763 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1765 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1766 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1767 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1768 s->filter.simple, 0);
1771 static const uint8_t subpel_idx[3][8] = {
1772 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1773 // also function pointer index
1774 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1775 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1781 * @param s VP8 decoding context
1782 * @param dst target buffer for block data at block position
1783 * @param ref reference picture buffer at origin (0, 0)
1784 * @param mv motion vector (relative to block position) to get pixel data from
1785 * @param x_off horizontal position of block from origin (0, 0)
1786 * @param y_off vertical position of block from origin (0, 0)
1787 * @param block_w width of block (16, 8 or 4)
1788 * @param block_h height of block (always same as block_w)
1789 * @param width width of src/dst plane data
1790 * @param height height of src/dst plane data
1791 * @param linesize size of a single line of plane data, including padding
1792 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1794 static av_always_inline
1795 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1796 ThreadFrame *ref, const VP56mv *mv,
1797 int x_off, int y_off, int block_w, int block_h,
1798 int width, int height, ptrdiff_t linesize,
1799 vp8_mc_func mc_func[3][3])
1801 uint8_t *src = ref->f->data[0];
1804 ptrdiff_t src_linesize = linesize;
1806 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1807 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1809 x_off += mv->x >> 2;
1810 y_off += mv->y >> 2;
1813 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1814 src += y_off * linesize + x_off;
1815 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1816 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1817 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1818 src - my_idx * linesize - mx_idx,
1819 EDGE_EMU_LINESIZE, linesize,
1820 block_w + subpel_idx[1][mx],
1821 block_h + subpel_idx[1][my],
1822 x_off - mx_idx, y_off - my_idx,
1824 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1825 src_linesize = EDGE_EMU_LINESIZE;
1827 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1829 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1830 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1831 linesize, block_h, 0, 0);
1836 * chroma MC function
1838 * @param s VP8 decoding context
1839 * @param dst1 target buffer for block data at block position (U plane)
1840 * @param dst2 target buffer for block data at block position (V plane)
1841 * @param ref reference picture buffer at origin (0, 0)
1842 * @param mv motion vector (relative to block position) to get pixel data from
1843 * @param x_off horizontal position of block from origin (0, 0)
1844 * @param y_off vertical position of block from origin (0, 0)
1845 * @param block_w width of block (16, 8 or 4)
1846 * @param block_h height of block (always same as block_w)
1847 * @param width width of src/dst plane data
1848 * @param height height of src/dst plane data
1849 * @param linesize size of a single line of plane data, including padding
1850 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1852 static av_always_inline
1853 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1854 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1855 int x_off, int y_off, int block_w, int block_h,
1856 int width, int height, ptrdiff_t linesize,
1857 vp8_mc_func mc_func[3][3])
1859 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1862 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1863 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1865 x_off += mv->x >> 3;
1866 y_off += mv->y >> 3;
1869 src1 += y_off * linesize + x_off;
1870 src2 += y_off * linesize + x_off;
1871 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1872 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1873 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1874 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1875 src1 - my_idx * linesize - mx_idx,
1876 EDGE_EMU_LINESIZE, linesize,
1877 block_w + subpel_idx[1][mx],
1878 block_h + subpel_idx[1][my],
1879 x_off - mx_idx, y_off - my_idx, width, height);
1880 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1881 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1883 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1884 src2 - my_idx * linesize - mx_idx,
1885 EDGE_EMU_LINESIZE, linesize,
1886 block_w + subpel_idx[1][mx],
1887 block_h + subpel_idx[1][my],
1888 x_off - mx_idx, y_off - my_idx, width, height);
1889 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1890 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1892 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1893 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1896 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1897 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1898 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1902 static av_always_inline
1903 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1904 ThreadFrame *ref_frame, int x_off, int y_off,
1905 int bx_off, int by_off, int block_w, int block_h,
1906 int width, int height, VP56mv *mv)
1911 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1912 ref_frame, mv, x_off + bx_off, y_off + by_off,
1913 block_w, block_h, width, height, s->linesize,
1914 s->put_pixels_tab[block_w == 8]);
1917 if (s->profile == 3) {
1918 /* this block only applies VP8; it is safe to check
1919 * only the profile, as VP7 profile <= 1 */
1931 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1932 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1933 &uvmv, x_off + bx_off, y_off + by_off,
1934 block_w, block_h, width, height, s->uvlinesize,
1935 s->put_pixels_tab[1 + (block_w == 4)]);
1938 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1939 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1940 static av_always_inline
1941 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1944 /* Don't prefetch refs that haven't been used very often this frame. */
1945 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1946 int x_off = mb_x << 4, y_off = mb_y << 4;
1947 int mx = (mb->mv.x >> 2) + x_off + 8;
1948 int my = (mb->mv.y >> 2) + y_off;
1949 uint8_t **src = s->framep[ref]->tf.f->data;
1950 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1951 /* For threading, a ff_thread_await_progress here might be useful, but
1952 * it actually slows down the decoder. Since a bad prefetch doesn't
1953 * generate bad decoder output, we don't run it here. */
1954 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1955 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1956 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1961 * Apply motion vectors to prediction buffer, chapter 18.
1963 static av_always_inline
1964 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1965 VP8Macroblock *mb, int mb_x, int mb_y)
1967 int x_off = mb_x << 4, y_off = mb_y << 4;
1968 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1969 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1970 VP56mv *bmv = mb->bmv;
1972 switch (mb->partitioning) {
1973 case VP8_SPLITMVMODE_NONE:
1974 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1975 0, 0, 16, 16, width, height, &mb->mv);
1977 case VP8_SPLITMVMODE_4x4: {
1982 for (y = 0; y < 4; y++) {
1983 for (x = 0; x < 4; x++) {
1984 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1985 ref, &bmv[4 * y + x],
1986 4 * x + x_off, 4 * y + y_off, 4, 4,
1987 width, height, s->linesize,
1988 s->put_pixels_tab[2]);
1997 for (y = 0; y < 2; y++) {
1998 for (x = 0; x < 2; x++) {
1999 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
2000 mb->bmv[2 * y * 4 + 2 * x + 1].x +
2001 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
2002 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
2003 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
2004 mb->bmv[2 * y * 4 + 2 * x + 1].y +
2005 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
2006 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
2007 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
2008 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
2009 if (s->profile == 3) {
2013 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
2014 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
2015 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
2016 width, height, s->uvlinesize,
2017 s->put_pixels_tab[2]);
2022 case VP8_SPLITMVMODE_16x8:
2023 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2024 0, 0, 16, 8, width, height, &bmv[0]);
2025 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2026 0, 8, 16, 8, width, height, &bmv[1]);
2028 case VP8_SPLITMVMODE_8x16:
2029 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2030 0, 0, 8, 16, width, height, &bmv[0]);
2031 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2032 8, 0, 8, 16, width, height, &bmv[1]);
2034 case VP8_SPLITMVMODE_8x8:
2035 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2036 0, 0, 8, 8, width, height, &bmv[0]);
2037 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2038 8, 0, 8, 8, width, height, &bmv[1]);
2039 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2040 0, 8, 8, 8, width, height, &bmv[2]);
2041 vp8_mc_part(s, td, dst, ref, x_off, y_off,
2042 8, 8, 8, 8, width, height, &bmv[3]);
2047 static av_always_inline
2048 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
2052 if (mb->mode != MODE_I4x4) {
2053 uint8_t *y_dst = dst[0];
2054 for (y = 0; y < 4; y++) {
2055 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
2057 if (nnz4 & ~0x01010101) {
2058 for (x = 0; x < 4; x++) {
2059 if ((uint8_t) nnz4 == 1)
2060 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
2063 else if ((uint8_t) nnz4 > 1)
2064 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2072 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2075 y_dst += 4 * s->linesize;
2079 for (ch = 0; ch < 2; ch++) {
2080 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2082 uint8_t *ch_dst = dst[1 + ch];
2083 if (nnz4 & ~0x01010101) {
2084 for (y = 0; y < 2; y++) {
2085 for (x = 0; x < 2; x++) {
2086 if ((uint8_t) nnz4 == 1)
2087 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2088 td->block[4 + ch][(y << 1) + x],
2090 else if ((uint8_t) nnz4 > 1)
2091 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2092 td->block[4 + ch][(y << 1) + x],
2096 goto chroma_idct_end;
2098 ch_dst += 4 * s->uvlinesize;
2101 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2109 static av_always_inline
2110 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2111 VP8FilterStrength *f, int is_vp7)
2113 int interior_limit, filter_level;
2115 if (s->segmentation.enabled) {
2116 filter_level = s->segmentation.filter_level[mb->segment];
2117 if (!s->segmentation.absolute_vals)
2118 filter_level += s->filter.level;
2120 filter_level = s->filter.level;
2122 if (s->lf_delta.enabled) {
2123 filter_level += s->lf_delta.ref[mb->ref_frame];
2124 filter_level += s->lf_delta.mode[mb->mode];
2127 filter_level = av_clip_uintp2(filter_level, 6);
2129 interior_limit = filter_level;
2130 if (s->filter.sharpness) {
2131 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2132 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2134 interior_limit = FFMAX(interior_limit, 1);
2136 f->filter_level = filter_level;
2137 f->inner_limit = interior_limit;
2138 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2139 mb->mode == VP8_MVMODE_SPLIT;
2142 static av_always_inline
2143 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2144 int mb_x, int mb_y, int is_vp7)
2146 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2147 int filter_level = f->filter_level;
2148 int inner_limit = f->inner_limit;
2149 int inner_filter = f->inner_filter;
2150 ptrdiff_t linesize = s->linesize;
2151 ptrdiff_t uvlinesize = s->uvlinesize;
2152 static const uint8_t hev_thresh_lut[2][64] = {
2153 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2154 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2155 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2157 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2158 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2159 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2167 bedge_lim_y = filter_level;
2168 bedge_lim_uv = filter_level * 2;
2169 mbedge_lim = filter_level + 2;
2172 bedge_lim_uv = filter_level * 2 + inner_limit;
2173 mbedge_lim = bedge_lim_y + 4;
2176 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2179 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2180 mbedge_lim, inner_limit, hev_thresh);
2181 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2182 mbedge_lim, inner_limit, hev_thresh);
2185 #define H_LOOP_FILTER_16Y_INNER(cond) \
2186 if (cond && inner_filter) { \
2187 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2188 bedge_lim_y, inner_limit, \
2190 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2191 bedge_lim_y, inner_limit, \
2193 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2194 bedge_lim_y, inner_limit, \
2196 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2197 uvlinesize, bedge_lim_uv, \
2198 inner_limit, hev_thresh); \
2201 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2204 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2205 mbedge_lim, inner_limit, hev_thresh);
2206 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2207 mbedge_lim, inner_limit, hev_thresh);
2211 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2212 linesize, bedge_lim_y,
2213 inner_limit, hev_thresh);
2214 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2215 linesize, bedge_lim_y,
2216 inner_limit, hev_thresh);
2217 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2218 linesize, bedge_lim_y,
2219 inner_limit, hev_thresh);
2220 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2221 dst[2] + 4 * uvlinesize,
2222 uvlinesize, bedge_lim_uv,
2223 inner_limit, hev_thresh);
2226 H_LOOP_FILTER_16Y_INNER(is_vp7)
2229 static av_always_inline
2230 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2233 int mbedge_lim, bedge_lim;
2234 int filter_level = f->filter_level;
2235 int inner_limit = f->inner_limit;
2236 int inner_filter = f->inner_filter;
2237 ptrdiff_t linesize = s->linesize;
2242 bedge_lim = 2 * filter_level + inner_limit;
2243 mbedge_lim = bedge_lim + 4;
2246 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2248 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2249 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2250 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2254 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2256 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2257 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2258 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2262 #define MARGIN (16 << 2)
2263 static av_always_inline
2264 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2265 VP8Frame *prev_frame, int is_vp7)
2267 VP8Context *s = avctx->priv_data;
2270 s->mv_bounds.mv_min.y = -MARGIN;
2271 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2272 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2273 VP8Macroblock *mb = s->macroblocks_base +
2274 ((s->mb_width + 1) * (mb_y + 1) + 1);
2275 int mb_xy = mb_y * s->mb_width;
2277 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2279 s->mv_bounds.mv_min.x = -MARGIN;
2280 s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2281 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2283 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2284 DC_PRED * 0x01010101);
2285 decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2286 prev_frame && prev_frame->seg_map ?
2287 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2288 s->mv_bounds.mv_min.x -= 64;
2289 s->mv_bounds.mv_max.x -= 64;
2291 s->mv_bounds.mv_min.y -= 64;
2292 s->mv_bounds.mv_max.y -= 64;
2296 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2297 VP8Frame *prev_frame)
2299 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2302 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2303 VP8Frame *prev_frame)
2305 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2309 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2311 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2312 if (atomic_load(&otd->thread_mb_pos) < tmp) { \
2313 pthread_mutex_lock(&otd->lock); \
2314 atomic_store(&td->wait_mb_pos, tmp); \
2316 if (atomic_load(&otd->thread_mb_pos) >= tmp) \
2318 pthread_cond_wait(&otd->cond, &otd->lock); \
2320 atomic_store(&td->wait_mb_pos, INT_MAX); \
2321 pthread_mutex_unlock(&otd->lock); \
2325 #define update_pos(td, mb_y, mb_x) \
2327 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2328 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2330 int is_null = !next_td || !prev_td; \
2331 int pos_check = (is_null) ? 1 : \
2332 (next_td != td && pos >= atomic_load(&next_td->wait_mb_pos)) || \
2333 (prev_td != td && pos >= atomic_load(&prev_td->wait_mb_pos)); \
2334 atomic_store(&td->thread_mb_pos, pos); \
2335 if (sliced_threading && pos_check) { \
2336 pthread_mutex_lock(&td->lock); \
2337 pthread_cond_broadcast(&td->cond); \
2338 pthread_mutex_unlock(&td->lock); \
2342 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2343 #define update_pos(td, mb_y, mb_x) while(0)
2346 static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2347 int jobnr, int threadnr, int is_vp7)
2349 VP8Context *s = avctx->priv_data;
2350 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2351 int mb_y = atomic_load(&td->thread_mb_pos) >> 16;
2352 int mb_x, mb_xy = mb_y * s->mb_width;
2353 int num_jobs = s->num_jobs;
2354 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2355 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2358 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2359 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2360 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2363 if (c->end <= c->buffer && c->bits >= 0)
2364 return AVERROR_INVALIDDATA;
2369 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2370 if (mb_y == s->mb_height - 1)
2373 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2374 if (s->mb_layout == 1)
2375 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2377 // Make sure the previous frame has read its segmentation map,
2378 // if we re-use the same map.
2379 if (prev_frame && s->segmentation.enabled &&
2380 !s->segmentation.update_map)
2381 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2382 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2383 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2384 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2387 if (!is_vp7 || mb_y == 0)
2388 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2390 td->mv_bounds.mv_min.x = -MARGIN;
2391 td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2393 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2394 if (c->end <= c->buffer && c->bits >= 0)
2395 return AVERROR_INVALIDDATA;
2396 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2397 if (prev_td != td) {
2398 if (threadnr != 0) {
2399 check_thread_pos(td, prev_td,
2400 mb_x + (is_vp7 ? 2 : 1),
2401 mb_y - (is_vp7 ? 2 : 1));
2403 check_thread_pos(td, prev_td,
2404 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2405 mb_y - (is_vp7 ? 2 : 1));
2409 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2411 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2412 dst[2] - dst[1], 2);
2415 decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2416 prev_frame && prev_frame->seg_map ?
2417 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2419 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2422 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2424 if (mb->mode <= MODE_I4x4)
2425 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2427 inter_predict(s, td, dst, mb, mb_x, mb_y);
2429 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2432 idct_mb(s, td, dst, mb);
2434 AV_ZERO64(td->left_nnz);
2435 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2437 /* Reset DC block predictors if they would exist
2438 * if the mb had coefficients */
2439 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2440 td->left_nnz[8] = 0;
2441 s->top_nnz[mb_x][8] = 0;
2445 if (s->deblock_filter)
2446 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2448 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2449 if (s->filter.simple)
2450 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2451 NULL, NULL, s->linesize, 0, 1);
2453 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2454 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2457 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2462 td->mv_bounds.mv_min.x -= 64;
2463 td->mv_bounds.mv_max.x -= 64;
2465 if (mb_x == s->mb_width + 1) {
2466 update_pos(td, mb_y, s->mb_width + 3);
2468 update_pos(td, mb_y, mb_x);
2474 static int vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2475 int jobnr, int threadnr)
2477 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2480 static int vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2481 int jobnr, int threadnr)
2483 return decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2486 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2487 int jobnr, int threadnr, int is_vp7)
2489 VP8Context *s = avctx->priv_data;
2490 VP8ThreadData *td = &s->thread_data[threadnr];
2491 int mb_x, mb_y = atomic_load(&td->thread_mb_pos) >> 16, num_jobs = s->num_jobs;
2492 AVFrame *curframe = s->curframe->tf.f;
2494 VP8ThreadData *prev_td, *next_td;
2496 curframe->data[0] + 16 * mb_y * s->linesize,
2497 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2498 curframe->data[2] + 8 * mb_y * s->uvlinesize
2501 if (s->mb_layout == 1)
2502 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2504 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2509 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2510 if (mb_y == s->mb_height - 1)
2513 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2515 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2516 VP8FilterStrength *f = &td->filter_strength[mb_x];
2518 check_thread_pos(td, prev_td,
2519 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2521 if (next_td != &s->thread_data[0])
2522 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2524 if (num_jobs == 1) {
2525 if (s->filter.simple)
2526 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2527 NULL, NULL, s->linesize, 0, 1);
2529 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2530 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2533 if (s->filter.simple)
2534 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2536 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2541 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2545 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2546 int jobnr, int threadnr)
2548 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2551 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2552 int jobnr, int threadnr)
2554 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2557 static av_always_inline
2558 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2559 int threadnr, int is_vp7)
2561 VP8Context *s = avctx->priv_data;
2562 VP8ThreadData *td = &s->thread_data[jobnr];
2563 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2564 VP8Frame *curframe = s->curframe;
2565 int mb_y, num_jobs = s->num_jobs;
2568 td->thread_nr = threadnr;
2569 td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr;
2570 td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr;
2571 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2572 atomic_store(&td->thread_mb_pos, mb_y << 16);
2573 ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2575 update_pos(td, s->mb_height, INT_MAX & 0xFFFF);
2578 if (s->deblock_filter)
2579 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2580 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2582 td->mv_bounds.mv_min.y -= 64 * num_jobs;
2583 td->mv_bounds.mv_max.y -= 64 * num_jobs;
2585 if (avctx->active_thread_type == FF_THREAD_FRAME)
2586 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2592 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2593 int jobnr, int threadnr)
2595 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2598 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2599 int jobnr, int threadnr)
2601 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2604 static av_always_inline
2605 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2606 AVPacket *avpkt, int is_vp7)
2608 VP8Context *s = avctx->priv_data;
2609 int ret, i, referenced, num_jobs;
2610 enum AVDiscard skip_thresh;
2611 VP8Frame *av_uninit(curframe), *prev_frame;
2614 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2616 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2621 if (s->actually_webp) {
2622 // avctx->pix_fmt already set in caller.
2623 } else if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2624 s->pix_fmt = get_pixel_format(s);
2625 if (s->pix_fmt < 0) {
2626 ret = AVERROR(EINVAL);
2629 avctx->pix_fmt = s->pix_fmt;
2632 prev_frame = s->framep[VP56_FRAME_CURRENT];
2634 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2635 s->update_altref == VP56_FRAME_CURRENT;
2637 skip_thresh = !referenced ? AVDISCARD_NONREF
2638 : !s->keyframe ? AVDISCARD_NONKEY
2641 if (avctx->skip_frame >= skip_thresh) {
2643 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2646 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2648 // release no longer referenced frames
2649 for (i = 0; i < 5; i++)
2650 if (s->frames[i].tf.f->buf[0] &&
2651 &s->frames[i] != prev_frame &&
2652 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2653 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2654 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2655 vp8_release_frame(s, &s->frames[i]);
2657 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2660 avctx->colorspace = AVCOL_SPC_BT470BG;
2662 avctx->color_range = AVCOL_RANGE_JPEG;
2664 avctx->color_range = AVCOL_RANGE_MPEG;
2666 /* Given that arithmetic probabilities are updated every frame, it's quite
2667 * likely that the values we have on a random interframe are complete
2668 * junk if we didn't start decode on a keyframe. So just don't display
2669 * anything rather than junk. */
2670 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2671 !s->framep[VP56_FRAME_GOLDEN] ||
2672 !s->framep[VP56_FRAME_GOLDEN2])) {
2673 av_log(avctx, AV_LOG_WARNING,
2674 "Discarding interframe without a prior keyframe!\n");
2675 ret = AVERROR_INVALIDDATA;
2679 curframe->tf.f->key_frame = s->keyframe;
2680 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2681 : AV_PICTURE_TYPE_P;
2682 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2685 // check if golden and altref are swapped
2686 if (s->update_altref != VP56_FRAME_NONE)
2687 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2689 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2691 if (s->update_golden != VP56_FRAME_NONE)
2692 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2694 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2697 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2699 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2701 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2703 ff_thread_finish_setup(avctx);
2705 if (avctx->hwaccel) {
2706 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2710 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2714 ret = avctx->hwaccel->end_frame(avctx);
2719 s->linesize = curframe->tf.f->linesize[0];
2720 s->uvlinesize = curframe->tf.f->linesize[1];
2722 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2723 /* Zero macroblock structures for top/top-left prediction
2724 * from outside the frame. */
2726 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2727 (s->mb_width + 1) * sizeof(*s->macroblocks));
2728 if (!s->mb_layout && s->keyframe)
2729 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2731 memset(s->ref_count, 0, sizeof(s->ref_count));
2733 if (s->mb_layout == 1) {
2734 // Make sure the previous frame has read its segmentation map,
2735 // if we re-use the same map.
2736 if (prev_frame && s->segmentation.enabled &&
2737 !s->segmentation.update_map)
2738 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2740 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2742 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2745 if (avctx->active_thread_type == FF_THREAD_FRAME)
2748 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2749 s->num_jobs = num_jobs;
2750 s->curframe = curframe;
2751 s->prev_frame = prev_frame;
2752 s->mv_bounds.mv_min.y = -MARGIN;
2753 s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2754 for (i = 0; i < MAX_THREADS; i++) {
2755 VP8ThreadData *td = &s->thread_data[i];
2756 atomic_init(&td->thread_mb_pos, 0);
2757 atomic_init(&td->wait_mb_pos, INT_MAX);
2760 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2763 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2767 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2768 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2771 // if future frames don't use the updated probabilities,
2772 // reset them to the values we saved
2773 if (!s->update_probabilities)
2774 s->prob[0] = s->prob[1];
2776 if (!s->invisible) {
2777 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2784 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2788 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2791 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2794 #if CONFIG_VP7_DECODER
2795 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2798 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2800 #endif /* CONFIG_VP7_DECODER */
2802 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2804 VP8Context *s = avctx->priv_data;
2810 vp8_decode_flush_impl(avctx, 1);
2811 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2812 av_frame_free(&s->frames[i].tf.f);
2817 static av_cold int vp8_init_frames(VP8Context *s)
2820 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2821 s->frames[i].tf.f = av_frame_alloc();
2822 if (!s->frames[i].tf.f)
2823 return AVERROR(ENOMEM);
2828 static av_always_inline
2829 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2831 VP8Context *s = avctx->priv_data;
2835 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2836 s->pix_fmt = AV_PIX_FMT_NONE;
2837 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2838 avctx->internal->allocate_progress = 1;
2840 ff_videodsp_init(&s->vdsp, 8);
2842 ff_vp78dsp_init(&s->vp8dsp);
2843 if (CONFIG_VP7_DECODER && is_vp7) {
2844 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2845 ff_vp7dsp_init(&s->vp8dsp);
2846 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2847 s->filter_mb_row = vp7_filter_mb_row;
2848 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2849 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2850 ff_vp8dsp_init(&s->vp8dsp);
2851 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2852 s->filter_mb_row = vp8_filter_mb_row;
2855 /* does not change for VP8 */
2856 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2858 if ((ret = vp8_init_frames(s)) < 0) {
2859 ff_vp8_decode_free(avctx);
2866 #if CONFIG_VP7_DECODER
2867 static int vp7_decode_init(AVCodecContext *avctx)
2869 return vp78_decode_init(avctx, IS_VP7);
2871 #endif /* CONFIG_VP7_DECODER */
2873 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2875 return vp78_decode_init(avctx, IS_VP8);
2878 #if CONFIG_VP8_DECODER
2880 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2882 VP8Context *s = avctx->priv_data;
2887 if ((ret = vp8_init_frames(s)) < 0) {
2888 ff_vp8_decode_free(avctx);
2895 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2897 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2898 const AVCodecContext *src)
2900 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2903 if (s->macroblocks_base &&
2904 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2906 s->mb_width = s_src->mb_width;
2907 s->mb_height = s_src->mb_height;
2910 s->pix_fmt = s_src->pix_fmt;
2911 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2912 s->segmentation = s_src->segmentation;
2913 s->lf_delta = s_src->lf_delta;
2914 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2916 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2917 if (s_src->frames[i].tf.f->buf[0]) {
2918 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2924 s->framep[0] = REBASE(s_src->next_framep[0]);
2925 s->framep[1] = REBASE(s_src->next_framep[1]);
2926 s->framep[2] = REBASE(s_src->next_framep[2]);
2927 s->framep[3] = REBASE(s_src->next_framep[3]);
2931 #endif /* HAVE_THREADS */
2932 #endif /* CONFIG_VP8_DECODER */
2934 #if CONFIG_VP7_DECODER
2935 AVCodec ff_vp7_decoder = {
2937 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2938 .type = AVMEDIA_TYPE_VIDEO,
2939 .id = AV_CODEC_ID_VP7,
2940 .priv_data_size = sizeof(VP8Context),
2941 .init = vp7_decode_init,
2942 .close = ff_vp8_decode_free,
2943 .decode = vp7_decode_frame,
2944 .capabilities = AV_CODEC_CAP_DR1,
2945 .flush = vp8_decode_flush,
2947 #endif /* CONFIG_VP7_DECODER */
2949 #if CONFIG_VP8_DECODER
2950 AVCodec ff_vp8_decoder = {
2952 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2953 .type = AVMEDIA_TYPE_VIDEO,
2954 .id = AV_CODEC_ID_VP8,
2955 .priv_data_size = sizeof(VP8Context),
2956 .init = ff_vp8_decode_init,
2957 .close = ff_vp8_decode_free,
2958 .decode = ff_vp8_decode_frame,
2959 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2960 AV_CODEC_CAP_SLICE_THREADS,
2961 .flush = vp8_decode_flush,
2962 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2963 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2964 .hw_configs = (const AVCodecHWConfigInternal*[]) {
2965 #if CONFIG_VP8_VAAPI_HWACCEL
2968 #if CONFIG_VP8_NVDEC_HWACCEL
2974 #endif /* CONFIG_VP7_DECODER */