2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
41 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
42 #elif CONFIG_VP7_DECODER
43 #define VPX(vp7, f) vp7_ ## f
44 #else // CONFIG_VP8_DECODER
45 #define VPX(vp7, f) vp8_ ## f
48 static void free_buffers(VP8Context *s)
52 for (i = 0; i < MAX_THREADS; i++) {
54 pthread_cond_destroy(&s->thread_data[i].cond);
55 pthread_mutex_destroy(&s->thread_data[i].lock);
57 av_freep(&s->thread_data[i].filter_strength);
59 av_freep(&s->thread_data);
60 av_freep(&s->macroblocks_base);
61 av_freep(&s->intra4x4_pred_mode_top);
62 av_freep(&s->top_nnz);
63 av_freep(&s->top_border);
65 s->macroblocks = NULL;
68 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
71 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
72 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
74 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
75 ff_thread_release_buffer(s->avctx, &f->tf);
76 return AVERROR(ENOMEM);
81 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
83 av_buffer_unref(&f->seg_map);
84 ff_thread_release_buffer(s->avctx, &f->tf);
87 #if CONFIG_VP8_DECODER
88 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
92 vp8_release_frame(s, dst);
94 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
97 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
98 vp8_release_frame(s, dst);
99 return AVERROR(ENOMEM);
104 #endif /* CONFIG_VP8_DECODER */
106 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
108 VP8Context *s = avctx->priv_data;
111 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
112 vp8_release_frame(s, &s->frames[i]);
113 memset(s->framep, 0, sizeof(s->framep));
119 static void vp8_decode_flush(AVCodecContext *avctx)
121 vp8_decode_flush_impl(avctx, 0);
124 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
126 VP8Frame *frame = NULL;
129 // find a free buffer
130 for (i = 0; i < 5; i++)
131 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
132 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
133 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
135 frame = &s->frames[i];
139 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
142 if (frame->tf.f->data[0])
143 vp8_release_frame(s, frame);
148 static av_always_inline
149 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
151 AVCodecContext *avctx = s->avctx;
154 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
155 height != s->avctx->height) {
156 vp8_decode_flush_impl(s->avctx, 1);
158 ret = ff_set_dimensions(s->avctx, width, height);
163 s->mb_width = (s->avctx->coded_width + 15) / 16;
164 s->mb_height = (s->avctx->coded_height + 15) / 16;
166 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
167 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
168 if (!s->mb_layout) { // Frame threading and one thread
169 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
170 sizeof(*s->macroblocks));
171 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
172 } else // Sliced threading
173 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
174 sizeof(*s->macroblocks));
175 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
176 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
177 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
179 for (i = 0; i < MAX_THREADS; i++) {
180 s->thread_data[i].filter_strength =
181 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
183 pthread_mutex_init(&s->thread_data[i].lock, NULL);
184 pthread_cond_init(&s->thread_data[i].cond, NULL);
188 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
189 (!s->intra4x4_pred_mode_top && !s->mb_layout))
190 return AVERROR(ENOMEM);
192 s->macroblocks = s->macroblocks_base + 1;
197 static int vp7_update_dimensions(VP8Context *s, int width, int height)
199 return update_dimensions(s, width, height, IS_VP7);
202 static int vp8_update_dimensions(VP8Context *s, int width, int height)
204 return update_dimensions(s, width, height, IS_VP8);
208 static void parse_segment_info(VP8Context *s)
210 VP56RangeCoder *c = &s->c;
213 s->segmentation.update_map = vp8_rac_get(c);
215 if (vp8_rac_get(c)) { // update segment feature data
216 s->segmentation.absolute_vals = vp8_rac_get(c);
218 for (i = 0; i < 4; i++)
219 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
221 for (i = 0; i < 4; i++)
222 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
224 if (s->segmentation.update_map)
225 for (i = 0; i < 3; i++)
226 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
229 static void update_lf_deltas(VP8Context *s)
231 VP56RangeCoder *c = &s->c;
234 for (i = 0; i < 4; i++) {
235 if (vp8_rac_get(c)) {
236 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
239 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
243 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
244 if (vp8_rac_get(c)) {
245 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
248 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
253 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
255 const uint8_t *sizes = buf;
258 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
260 buf += 3 * (s->num_coeff_partitions - 1);
261 buf_size -= 3 * (s->num_coeff_partitions - 1);
265 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
266 int size = AV_RL24(sizes + 3 * i);
267 if (buf_size - size < 0)
270 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
274 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
279 static void vp7_get_quants(VP8Context *s)
281 VP56RangeCoder *c = &s->c;
283 int yac_qi = vp8_rac_get_uint(c, 7);
284 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
286 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
287 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
288 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
290 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
291 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
292 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
293 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
294 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
295 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
298 static void vp8_get_quants(VP8Context *s)
300 VP56RangeCoder *c = &s->c;
303 int yac_qi = vp8_rac_get_uint(c, 7);
304 int ydc_delta = vp8_rac_get_sint(c, 4);
305 int y2dc_delta = vp8_rac_get_sint(c, 4);
306 int y2ac_delta = vp8_rac_get_sint(c, 4);
307 int uvdc_delta = vp8_rac_get_sint(c, 4);
308 int uvac_delta = vp8_rac_get_sint(c, 4);
310 for (i = 0; i < 4; i++) {
311 if (s->segmentation.enabled) {
312 base_qi = s->segmentation.base_quant[i];
313 if (!s->segmentation.absolute_vals)
318 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
319 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
320 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
321 /* 101581>>16 is equivalent to 155/100 */
322 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
323 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
324 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
326 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
327 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
332 * Determine which buffers golden and altref should be updated with after this frame.
333 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
335 * Intra frames update all 3 references
336 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
337 * If the update (golden|altref) flag is set, it's updated with the current frame
338 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
339 * If the flag is not set, the number read means:
341 * 1: VP56_FRAME_PREVIOUS
342 * 2: update golden with altref, or update altref with golden
344 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
346 VP56RangeCoder *c = &s->c;
349 return VP56_FRAME_CURRENT;
351 switch (vp8_rac_get_uint(c, 2)) {
353 return VP56_FRAME_PREVIOUS;
355 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
357 return VP56_FRAME_NONE;
360 static void vp78_reset_probability_tables(VP8Context *s)
363 for (i = 0; i < 4; i++)
364 for (j = 0; j < 16; j++)
365 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
366 sizeof(s->prob->token[i][j]));
369 static void vp78_update_probability_tables(VP8Context *s)
371 VP56RangeCoder *c = &s->c;
374 for (i = 0; i < 4; i++)
375 for (j = 0; j < 8; j++)
376 for (k = 0; k < 3; k++)
377 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
378 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
379 int prob = vp8_rac_get_uint(c, 8);
380 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
381 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
385 #define VP7_MVC_SIZE 17
386 #define VP8_MVC_SIZE 19
388 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
391 VP56RangeCoder *c = &s->c;
395 for (i = 0; i < 4; i++)
396 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
398 for (i = 0; i < 3; i++)
399 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
401 // 17.2 MV probability update
402 for (i = 0; i < 2; i++)
403 for (j = 0; j < mvc_size; j++)
404 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
405 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
408 static void update_refs(VP8Context *s)
410 VP56RangeCoder *c = &s->c;
412 int update_golden = vp8_rac_get(c);
413 int update_altref = vp8_rac_get(c);
415 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
416 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
419 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
423 for (j = 1; j < 3; j++) {
424 for (i = 0; i < height / 2; i++)
425 memcpy(dst->data[j] + i * dst->linesize[j],
426 src->data[j] + i * src->linesize[j], width / 2);
430 static void fade(uint8_t *dst, int dst_linesize,
431 const uint8_t *src, int src_linesize,
432 int width, int height,
436 for (j = 0; j < height; j++) {
437 for (i = 0; i < width; i++) {
438 uint8_t y = src[j * src_linesize + i];
439 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
444 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
446 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
447 int beta = (int8_t) vp8_rac_get_uint(c, 8);
450 if (!s->keyframe && (alpha || beta)) {
451 int width = s->mb_width * 16;
452 int height = s->mb_height * 16;
455 if (!s->framep[VP56_FRAME_PREVIOUS] ||
456 !s->framep[VP56_FRAME_GOLDEN]) {
457 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
458 return AVERROR_INVALIDDATA;
462 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
464 /* preserve the golden frame, write a new previous frame */
465 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
466 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
467 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
470 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
472 copy_chroma(dst, src, width, height);
475 fade(dst->data[0], dst->linesize[0],
476 src->data[0], src->linesize[0],
477 width, height, alpha, beta);
483 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
485 VP56RangeCoder *c = &s->c;
486 int part1_size, hscale, vscale, i, j, ret;
487 int width = s->avctx->width;
488 int height = s->avctx->height;
490 s->profile = (buf[0] >> 1) & 7;
491 if (s->profile > 1) {
492 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
493 return AVERROR_INVALIDDATA;
496 s->keyframe = !(buf[0] & 1);
498 part1_size = AV_RL24(buf) >> 4;
500 if (buf_size < 4 - s->profile + part1_size) {
501 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
502 return AVERROR_INVALIDDATA;
505 buf += 4 - s->profile;
506 buf_size -= 4 - s->profile;
508 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
510 ff_vp56_init_range_decoder(c, buf, part1_size);
512 buf_size -= part1_size;
514 /* A. Dimension information (keyframes only) */
516 width = vp8_rac_get_uint(c, 12);
517 height = vp8_rac_get_uint(c, 12);
518 hscale = vp8_rac_get_uint(c, 2);
519 vscale = vp8_rac_get_uint(c, 2);
520 if (hscale || vscale)
521 avpriv_request_sample(s->avctx, "Upscaling");
523 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
524 vp78_reset_probability_tables(s);
525 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
526 sizeof(s->prob->pred16x16));
527 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
528 sizeof(s->prob->pred8x8c));
529 for (i = 0; i < 2; i++)
530 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
531 sizeof(vp7_mv_default_prob[i]));
532 memset(&s->segmentation, 0, sizeof(s->segmentation));
533 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
534 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
537 if (s->keyframe || s->profile > 0)
538 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
540 /* B. Decoding information for all four macroblock-level features */
541 for (i = 0; i < 4; i++) {
542 s->feature_enabled[i] = vp8_rac_get(c);
543 if (s->feature_enabled[i]) {
544 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
546 for (j = 0; j < 3; j++)
547 s->feature_index_prob[i][j] =
548 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
550 if (vp7_feature_value_size[s->profile][i])
551 for (j = 0; j < 4; j++)
552 s->feature_value[i][j] =
553 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
557 s->segmentation.enabled = 0;
558 s->segmentation.update_map = 0;
559 s->lf_delta.enabled = 0;
561 s->num_coeff_partitions = 1;
562 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
564 if (!s->macroblocks_base || /* first frame */
565 width != s->avctx->width || height != s->avctx->height ||
566 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
567 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
571 /* C. Dequantization indices */
574 /* D. Golden frame update flag (a Flag) for interframes only */
576 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
577 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
581 s->update_probabilities = 1;
584 if (s->profile > 0) {
585 s->update_probabilities = vp8_rac_get(c);
586 if (!s->update_probabilities)
587 s->prob[1] = s->prob[0];
590 s->fade_present = vp8_rac_get(c);
593 /* E. Fading information for previous frame */
594 if (s->fade_present && vp8_rac_get(c)) {
595 if ((ret = vp7_fade_frame(s ,c)) < 0)
599 /* F. Loop filter type */
601 s->filter.simple = vp8_rac_get(c);
603 /* G. DCT coefficient ordering specification */
605 for (i = 1; i < 16; i++)
606 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
608 /* H. Loop filter levels */
610 s->filter.simple = vp8_rac_get(c);
611 s->filter.level = vp8_rac_get_uint(c, 6);
612 s->filter.sharpness = vp8_rac_get_uint(c, 3);
614 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
615 vp78_update_probability_tables(s);
617 s->mbskip_enabled = 0;
619 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
621 s->prob->intra = vp8_rac_get_uint(c, 8);
622 s->prob->last = vp8_rac_get_uint(c, 8);
623 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
629 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
631 VP56RangeCoder *c = &s->c;
632 int header_size, hscale, vscale, ret;
633 int width = s->avctx->width;
634 int height = s->avctx->height;
636 s->keyframe = !(buf[0] & 1);
637 s->profile = (buf[0]>>1) & 7;
638 s->invisible = !(buf[0] & 0x10);
639 header_size = AV_RL24(buf) >> 5;
644 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
647 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
648 sizeof(s->put_pixels_tab));
649 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
650 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
651 sizeof(s->put_pixels_tab));
653 if (header_size > buf_size - 7 * s->keyframe) {
654 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
655 return AVERROR_INVALIDDATA;
659 if (AV_RL24(buf) != 0x2a019d) {
660 av_log(s->avctx, AV_LOG_ERROR,
661 "Invalid start code 0x%x\n", AV_RL24(buf));
662 return AVERROR_INVALIDDATA;
664 width = AV_RL16(buf + 3) & 0x3fff;
665 height = AV_RL16(buf + 5) & 0x3fff;
666 hscale = buf[4] >> 6;
667 vscale = buf[6] >> 6;
671 if (hscale || vscale)
672 avpriv_request_sample(s->avctx, "Upscaling");
674 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
675 vp78_reset_probability_tables(s);
676 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
677 sizeof(s->prob->pred16x16));
678 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
679 sizeof(s->prob->pred8x8c));
680 memcpy(s->prob->mvc, vp8_mv_default_prob,
681 sizeof(s->prob->mvc));
682 memset(&s->segmentation, 0, sizeof(s->segmentation));
683 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
686 ff_vp56_init_range_decoder(c, buf, header_size);
688 buf_size -= header_size;
692 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
693 vp8_rac_get(c); // whether we can skip clamping in dsp functions
696 if ((s->segmentation.enabled = vp8_rac_get(c)))
697 parse_segment_info(s);
699 s->segmentation.update_map = 0; // FIXME: move this to some init function?
701 s->filter.simple = vp8_rac_get(c);
702 s->filter.level = vp8_rac_get_uint(c, 6);
703 s->filter.sharpness = vp8_rac_get_uint(c, 3);
705 if ((s->lf_delta.enabled = vp8_rac_get(c)))
709 if (setup_partitions(s, buf, buf_size)) {
710 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
711 return AVERROR_INVALIDDATA;
714 if (!s->macroblocks_base || /* first frame */
715 width != s->avctx->width || height != s->avctx->height ||
716 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
717 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
724 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
725 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
728 // if we aren't saving this frame's probabilities for future frames,
729 // make a copy of the current probabilities
730 if (!(s->update_probabilities = vp8_rac_get(c)))
731 s->prob[1] = s->prob[0];
733 s->update_last = s->keyframe || vp8_rac_get(c);
735 vp78_update_probability_tables(s);
737 if ((s->mbskip_enabled = vp8_rac_get(c)))
738 s->prob->mbskip = vp8_rac_get_uint(c, 8);
741 s->prob->intra = vp8_rac_get_uint(c, 8);
742 s->prob->last = vp8_rac_get_uint(c, 8);
743 s->prob->golden = vp8_rac_get_uint(c, 8);
744 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
750 static av_always_inline
751 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
753 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
754 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
758 * Motion vector coding, 17.1.
760 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
764 if (vp56_rac_get_prob_branchy(c, p[0])) {
767 for (i = 0; i < 3; i++)
768 x += vp56_rac_get_prob(c, p[9 + i]) << i;
769 for (i = (vp7 ? 7 : 9); i > 3; i--)
770 x += vp56_rac_get_prob(c, p[9 + i]) << i;
771 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
775 const uint8_t *ps = p + 2;
776 bit = vp56_rac_get_prob(c, *ps);
779 bit = vp56_rac_get_prob(c, *ps);
782 x += vp56_rac_get_prob(c, *ps);
785 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
788 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
790 return read_mv_component(c, p, 1);
793 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
795 return read_mv_component(c, p, 0);
798 static av_always_inline
799 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
802 return vp7_submv_prob;
805 return vp8_submv_prob[4 - !!left];
807 return vp8_submv_prob[2];
808 return vp8_submv_prob[1 - !!left];
812 * Split motion vector prediction, 16.4.
813 * @returns the number of motion vectors parsed (2, 4 or 16)
815 static av_always_inline
816 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
817 int layout, int is_vp7)
821 VP8Macroblock *top_mb;
822 VP8Macroblock *left_mb = &mb[-1];
823 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
824 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
826 VP56mv *left_mv = left_mb->bmv;
827 VP56mv *cur_mv = mb->bmv;
829 if (!layout) // layout is inlined, s->mb_layout is not
832 top_mb = &mb[-s->mb_width - 1];
833 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
834 top_mv = top_mb->bmv;
836 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
837 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
838 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
840 part_idx = VP8_SPLITMVMODE_8x8;
842 part_idx = VP8_SPLITMVMODE_4x4;
845 num = vp8_mbsplit_count[part_idx];
846 mbsplits_cur = vp8_mbsplits[part_idx],
847 firstidx = vp8_mbfirstidx[part_idx];
848 mb->partitioning = part_idx;
850 for (n = 0; n < num; n++) {
852 uint32_t left, above;
853 const uint8_t *submv_prob;
856 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
858 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
860 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
862 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
864 submv_prob = get_submv_prob(left, above, is_vp7);
866 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
867 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
868 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
869 mb->bmv[n].y = mb->mv.y +
870 read_mv_component(c, s->prob->mvc[0], is_vp7);
871 mb->bmv[n].x = mb->mv.x +
872 read_mv_component(c, s->prob->mvc[1], is_vp7);
874 AV_ZERO32(&mb->bmv[n]);
877 AV_WN32A(&mb->bmv[n], above);
880 AV_WN32A(&mb->bmv[n], left);
888 * The vp7 reference decoder uses a padding macroblock column (added to right
889 * edge of the frame) to guard against illegal macroblock offsets. The
890 * algorithm has bugs that permit offsets to straddle the padding column.
891 * This function replicates those bugs.
893 * @param[out] edge_x macroblock x address
894 * @param[out] edge_y macroblock y address
896 * @return macroblock offset legal (boolean)
898 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
899 int xoffset, int yoffset, int boundary,
900 int *edge_x, int *edge_y)
902 int vwidth = mb_width + 1;
903 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
904 if (new < boundary || new % vwidth == vwidth - 1)
906 *edge_y = new / vwidth;
907 *edge_x = new % vwidth;
911 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
913 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
916 static av_always_inline
917 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
918 int mb_x, int mb_y, int layout)
920 VP8Macroblock *mb_edge[12];
921 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
922 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
925 uint8_t cnt[3] = { 0 };
926 VP56RangeCoder *c = &s->c;
929 AV_ZERO32(&near_mv[0]);
930 AV_ZERO32(&near_mv[1]);
931 AV_ZERO32(&near_mv[2]);
933 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
934 const VP7MVPred * pred = &vp7_mv_pred[i];
937 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
938 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
939 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
940 ? s->macroblocks_base + 1 + edge_x +
941 (s->mb_width + 1) * (edge_y + 1)
942 : s->macroblocks + edge_x +
943 (s->mb_height - edge_y - 1) * 2;
944 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
946 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
947 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
949 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
950 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
954 AV_WN32A(&near_mv[CNT_NEAR], mv);
958 AV_WN32A(&near_mv[CNT_NEAREST], mv);
967 cnt[idx] += vp7_mv_pred[i].score;
970 mb->partitioning = VP8_SPLITMVMODE_NONE;
972 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
973 mb->mode = VP8_MVMODE_MV;
975 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
977 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
979 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
980 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
982 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
984 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
985 mb->mode = VP8_MVMODE_SPLIT;
986 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
988 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
989 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
993 mb->mv = near_mv[CNT_NEAR];
997 mb->mv = near_mv[CNT_NEAREST];
1001 mb->mode = VP8_MVMODE_ZERO;
1003 mb->bmv[0] = mb->mv;
1007 static av_always_inline
1008 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1009 int mb_x, int mb_y, int layout)
1011 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1014 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1015 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1017 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1018 int8_t *sign_bias = s->sign_bias;
1020 uint8_t cnt[4] = { 0 };
1021 VP56RangeCoder *c = &s->c;
1023 if (!layout) { // layout is inlined (s->mb_layout is not)
1024 mb_edge[0] = mb + 2;
1025 mb_edge[2] = mb + 1;
1027 mb_edge[0] = mb - s->mb_width - 1;
1028 mb_edge[2] = mb - s->mb_width - 2;
1031 AV_ZERO32(&near_mv[0]);
1032 AV_ZERO32(&near_mv[1]);
1033 AV_ZERO32(&near_mv[2]);
1035 /* Process MB on top, left and top-left */
1036 #define MV_EDGE_CHECK(n) \
1038 VP8Macroblock *edge = mb_edge[n]; \
1039 int edge_ref = edge->ref_frame; \
1040 if (edge_ref != VP56_FRAME_CURRENT) { \
1041 uint32_t mv = AV_RN32A(&edge->mv); \
1043 if (cur_sign_bias != sign_bias[edge_ref]) { \
1044 /* SWAR negate of the values in mv. */ \
1046 mv = ((mv & 0x7fff7fff) + \
1047 0x00010001) ^ (mv & 0x80008000); \
1049 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1050 AV_WN32A(&near_mv[++idx], mv); \
1051 cnt[idx] += 1 + (n != 2); \
1053 cnt[CNT_ZERO] += 1 + (n != 2); \
1061 mb->partitioning = VP8_SPLITMVMODE_NONE;
1062 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1063 mb->mode = VP8_MVMODE_MV;
1065 /* If we have three distinct MVs, merge first and last if they're the same */
1066 if (cnt[CNT_SPLITMV] &&
1067 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1068 cnt[CNT_NEAREST] += 1;
1070 /* Swap near and nearest if necessary */
1071 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1072 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1073 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1076 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1077 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1078 /* Choose the best mv out of 0,0 and the nearest mv */
1079 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1080 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1081 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1082 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1084 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1085 mb->mode = VP8_MVMODE_SPLIT;
1086 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1088 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1089 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1090 mb->bmv[0] = mb->mv;
1093 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1094 mb->bmv[0] = mb->mv;
1097 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1098 mb->bmv[0] = mb->mv;
1101 mb->mode = VP8_MVMODE_ZERO;
1103 mb->bmv[0] = mb->mv;
1107 static av_always_inline
1108 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1109 int mb_x, int keyframe, int layout)
1111 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1114 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1115 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1120 uint8_t *const left = s->intra4x4_pred_mode_left;
1122 top = mb->intra4x4_pred_mode_top;
1124 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1125 for (y = 0; y < 4; y++) {
1126 for (x = 0; x < 4; x++) {
1128 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1129 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1130 left[y] = top[x] = *intra4x4;
1136 for (i = 0; i < 16; i++)
1137 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1138 vp8_pred4x4_prob_inter);
1142 static av_always_inline
1143 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1144 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1146 VP56RangeCoder *c = &s->c;
1147 const char *vp7_feature_name[] = { "q-index",
1149 "partial-golden-update",
1154 for (i = 0; i < 4; i++) {
1155 if (s->feature_enabled[i]) {
1156 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1157 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1158 s->feature_index_prob[i]);
1159 av_log(s->avctx, AV_LOG_WARNING,
1160 "Feature %s present in macroblock (value 0x%x)\n",
1161 vp7_feature_name[i], s->feature_value[i][index]);
1165 } else if (s->segmentation.update_map) {
1166 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1167 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1168 } else if (s->segmentation.enabled)
1169 *segment = ref ? *ref : *segment;
1170 mb->segment = *segment;
1172 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1175 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1176 vp8_pred16x16_prob_intra);
1178 if (mb->mode == MODE_I4x4) {
1179 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1181 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1182 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1184 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1186 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1187 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1190 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1191 vp8_pred8x8c_prob_intra);
1192 mb->ref_frame = VP56_FRAME_CURRENT;
1193 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1195 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1197 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1198 : VP56_FRAME_GOLDEN;
1200 mb->ref_frame = VP56_FRAME_PREVIOUS;
1201 s->ref_count[mb->ref_frame - 1]++;
1203 // motion vectors, 16.3
1205 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1207 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1210 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1212 if (mb->mode == MODE_I4x4)
1213 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1215 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1217 mb->ref_frame = VP56_FRAME_CURRENT;
1218 mb->partitioning = VP8_SPLITMVMODE_NONE;
1219 AV_ZERO32(&mb->bmv[0]);
1224 * @param r arithmetic bitstream reader context
1225 * @param block destination for block coefficients
1226 * @param probs probabilities to use when reading trees from the bitstream
1227 * @param i initial coeff index, 0 unless a separate DC block is coded
1228 * @param qmul array holding the dc/ac dequant factor at position 0/1
1230 * @return 0 if no coeffs were decoded
1231 * otherwise, the index of the last coeff decoded plus one
1233 static av_always_inline
1234 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1235 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1236 int i, uint8_t *token_prob, int16_t qmul[2],
1237 const uint8_t scan[16], int vp7)
1239 VP56RangeCoder c = *r;
1244 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1248 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1250 break; // invalid input; blocks should end with EOB
1251 token_prob = probs[i][0];
1257 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1259 token_prob = probs[i + 1][1];
1261 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1262 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1264 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1268 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1269 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1270 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1271 } else { // DCT_CAT2
1273 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1274 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1276 } else { // DCT_CAT3 and up
1277 int a = vp56_rac_get_prob(&c, token_prob[8]);
1278 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1279 int cat = (a << 1) + b;
1280 coeff = 3 + (8 << cat);
1281 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1284 token_prob = probs[i + 1][2];
1286 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1293 static av_always_inline
1294 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1296 int16_t dc = block[0];
1304 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1305 block[0] = pred[0] = dc;
1310 block[0] = pred[0] = dc;
1316 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1318 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1319 int i, uint8_t *token_prob,
1321 const uint8_t scan[16])
1323 return decode_block_coeffs_internal(r, block, probs, i,
1324 token_prob, qmul, scan, IS_VP7);
1327 #ifndef vp8_decode_block_coeffs_internal
1328 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1330 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1331 int i, uint8_t *token_prob,
1334 return decode_block_coeffs_internal(r, block, probs, i,
1335 token_prob, qmul, zigzag_scan, IS_VP8);
1340 * @param c arithmetic bitstream reader context
1341 * @param block destination for block coefficients
1342 * @param probs probabilities to use when reading trees from the bitstream
1343 * @param i initial coeff index, 0 unless a separate DC block is coded
1344 * @param zero_nhood the initial prediction context for number of surrounding
1345 * all-zero blocks (only left/top, so 0-2)
1346 * @param qmul array holding the dc/ac dequant factor at position 0/1
1347 * @param scan scan pattern (VP7 only)
1349 * @return 0 if no coeffs were decoded
1350 * otherwise, the index of the last coeff decoded plus one
1352 static av_always_inline
1353 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1354 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1355 int i, int zero_nhood, int16_t qmul[2],
1356 const uint8_t scan[16], int vp7)
1358 uint8_t *token_prob = probs[i][zero_nhood];
1359 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1361 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1362 token_prob, qmul, scan)
1363 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1367 static av_always_inline
1368 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1369 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1372 int i, x, y, luma_start = 0, luma_ctx = 3;
1373 int nnz_pred, nnz, nnz_total = 0;
1374 int segment = mb->segment;
1377 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1378 nnz_pred = t_nnz[8] + l_nnz[8];
1380 // decode DC values and do hadamard
1381 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1382 nnz_pred, s->qmat[segment].luma_dc_qmul,
1383 zigzag_scan, is_vp7);
1384 l_nnz[8] = t_nnz[8] = !!nnz;
1386 if (is_vp7 && mb->mode > MODE_I4x4) {
1387 nnz |= inter_predict_dc(td->block_dc,
1388 s->inter_dc_pred[mb->ref_frame - 1]);
1395 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1397 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1404 for (y = 0; y < 4; y++)
1405 for (x = 0; x < 4; x++) {
1406 nnz_pred = l_nnz[y] + t_nnz[x];
1407 nnz = decode_block_coeffs(c, td->block[y][x],
1408 s->prob->token[luma_ctx],
1409 luma_start, nnz_pred,
1410 s->qmat[segment].luma_qmul,
1411 s->prob[0].scan, is_vp7);
1412 /* nnz+block_dc may be one more than the actual last index,
1413 * but we don't care */
1414 td->non_zero_count_cache[y][x] = nnz + block_dc;
1415 t_nnz[x] = l_nnz[y] = !!nnz;
1420 // TODO: what to do about dimensions? 2nd dim for luma is x,
1421 // but for chroma it's (y<<1)|x
1422 for (i = 4; i < 6; i++)
1423 for (y = 0; y < 2; y++)
1424 for (x = 0; x < 2; x++) {
1425 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1426 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1427 s->prob->token[2], 0, nnz_pred,
1428 s->qmat[segment].chroma_qmul,
1429 s->prob[0].scan, is_vp7);
1430 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1431 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1435 // if there were no coded coeffs despite the macroblock not being marked skip,
1436 // we MUST not do the inner loop filter and should not do IDCT
1437 // Since skip isn't used for bitstream prediction, just manually set it.
1442 static av_always_inline
1443 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1444 uint8_t *src_cb, uint8_t *src_cr,
1445 int linesize, int uvlinesize, int simple)
1447 AV_COPY128(top_border, src_y + 15 * linesize);
1449 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1450 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1454 static av_always_inline
1455 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1456 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1457 int mb_y, int mb_width, int simple, int xchg)
1459 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1461 src_cb -= uvlinesize;
1462 src_cr -= uvlinesize;
1464 #define XCHG(a, b, xchg) \
1472 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1473 XCHG(top_border, src_y, xchg);
1474 XCHG(top_border + 8, src_y + 8, 1);
1475 if (mb_x < mb_width - 1)
1476 XCHG(top_border + 32, src_y + 16, 1);
1478 // only copy chroma for normal loop filter
1479 // or to initialize the top row to 127
1480 if (!simple || !mb_y) {
1481 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1482 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1483 XCHG(top_border + 16, src_cb, 1);
1484 XCHG(top_border + 24, src_cr, 1);
1488 static av_always_inline
1489 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1492 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1494 return mb_y ? mode : LEFT_DC_PRED8x8;
1497 static av_always_inline
1498 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1501 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1503 return mb_y ? mode : HOR_PRED8x8;
1506 static av_always_inline
1507 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1511 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1513 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1515 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1516 case PLANE_PRED8x8: /* TM */
1517 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1522 static av_always_inline
1523 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1526 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1528 return mb_y ? mode : HOR_VP8_PRED;
1532 static av_always_inline
1533 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1534 int *copy_buf, int vp7)
1538 if (!mb_x && mb_y) {
1543 case DIAG_DOWN_LEFT_PRED:
1544 case VERT_LEFT_PRED:
1545 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1553 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1555 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1556 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1557 * as 16x16/8x8 DC */
1558 case DIAG_DOWN_RIGHT_PRED:
1559 case VERT_RIGHT_PRED:
1568 static av_always_inline
1569 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1570 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1572 int x, y, mode, nnz;
1575 /* for the first row, we need to run xchg_mb_border to init the top edge
1576 * to 127 otherwise, skip it if we aren't going to deblock */
1577 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1578 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1579 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1580 s->filter.simple, 1);
1582 if (mb->mode < MODE_I4x4) {
1583 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1584 s->hpc.pred16x16[mode](dst[0], s->linesize);
1586 uint8_t *ptr = dst[0];
1587 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1588 const uint8_t lo = is_vp7 ? 128 : 127;
1589 const uint8_t hi = is_vp7 ? 128 : 129;
1590 uint8_t tr_top[4] = { lo, lo, lo, lo };
1592 // all blocks on the right edge of the macroblock use bottom edge
1593 // the top macroblock for their topright edge
1594 uint8_t *tr_right = ptr - s->linesize + 16;
1596 // if we're on the right edge of the frame, said edge is extended
1597 // from the top macroblock
1598 if (mb_y && mb_x == s->mb_width - 1) {
1599 tr = tr_right[-1] * 0x01010101u;
1600 tr_right = (uint8_t *) &tr;
1604 AV_ZERO128(td->non_zero_count_cache);
1606 for (y = 0; y < 4; y++) {
1607 uint8_t *topright = ptr + 4 - s->linesize;
1608 for (x = 0; x < 4; x++) {
1609 int copy = 0, linesize = s->linesize;
1610 uint8_t *dst = ptr + 4 * x;
1611 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1613 if ((y == 0 || x == 3) && mb_y == 0) {
1616 topright = tr_right;
1618 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1619 mb_y + y, ©, is_vp7);
1621 dst = copy_dst + 12;
1625 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1627 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1631 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1640 copy_dst[11] = ptr[4 * x - 1];
1641 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1642 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1643 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1646 s->hpc.pred4x4[mode](dst, topright, linesize);
1648 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1649 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1650 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1651 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1654 nnz = td->non_zero_count_cache[y][x];
1657 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1658 td->block[y][x], s->linesize);
1660 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1661 td->block[y][x], s->linesize);
1666 ptr += 4 * s->linesize;
1671 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1672 mb_x, mb_y, is_vp7);
1673 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1674 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1676 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1677 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1678 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1679 s->filter.simple, 0);
1682 static const uint8_t subpel_idx[3][8] = {
1683 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1684 // also function pointer index
1685 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1686 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1692 * @param s VP8 decoding context
1693 * @param dst target buffer for block data at block position
1694 * @param ref reference picture buffer at origin (0, 0)
1695 * @param mv motion vector (relative to block position) to get pixel data from
1696 * @param x_off horizontal position of block from origin (0, 0)
1697 * @param y_off vertical position of block from origin (0, 0)
1698 * @param block_w width of block (16, 8 or 4)
1699 * @param block_h height of block (always same as block_w)
1700 * @param width width of src/dst plane data
1701 * @param height height of src/dst plane data
1702 * @param linesize size of a single line of plane data, including padding
1703 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1705 static av_always_inline
1706 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1707 ThreadFrame *ref, const VP56mv *mv,
1708 int x_off, int y_off, int block_w, int block_h,
1709 int width, int height, ptrdiff_t linesize,
1710 vp8_mc_func mc_func[3][3])
1712 uint8_t *src = ref->f->data[0];
1715 int src_linesize = linesize;
1717 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1718 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1720 x_off += mv->x >> 2;
1721 y_off += mv->y >> 2;
1724 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1725 src += y_off * linesize + x_off;
1726 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1727 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1728 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1729 src - my_idx * linesize - mx_idx,
1730 EDGE_EMU_LINESIZE, linesize,
1731 block_w + subpel_idx[1][mx],
1732 block_h + subpel_idx[1][my],
1733 x_off - mx_idx, y_off - my_idx,
1735 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1736 src_linesize = EDGE_EMU_LINESIZE;
1738 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1740 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1741 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1742 linesize, block_h, 0, 0);
1747 * chroma MC function
1749 * @param s VP8 decoding context
1750 * @param dst1 target buffer for block data at block position (U plane)
1751 * @param dst2 target buffer for block data at block position (V plane)
1752 * @param ref reference picture buffer at origin (0, 0)
1753 * @param mv motion vector (relative to block position) to get pixel data from
1754 * @param x_off horizontal position of block from origin (0, 0)
1755 * @param y_off vertical position of block from origin (0, 0)
1756 * @param block_w width of block (16, 8 or 4)
1757 * @param block_h height of block (always same as block_w)
1758 * @param width width of src/dst plane data
1759 * @param height height of src/dst plane data
1760 * @param linesize size of a single line of plane data, including padding
1761 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1763 static av_always_inline
1764 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1765 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1766 int x_off, int y_off, int block_w, int block_h,
1767 int width, int height, ptrdiff_t linesize,
1768 vp8_mc_func mc_func[3][3])
1770 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1773 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1774 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1776 x_off += mv->x >> 3;
1777 y_off += mv->y >> 3;
1780 src1 += y_off * linesize + x_off;
1781 src2 += y_off * linesize + x_off;
1782 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1783 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1784 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1785 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1786 src1 - my_idx * linesize - mx_idx,
1787 EDGE_EMU_LINESIZE, linesize,
1788 block_w + subpel_idx[1][mx],
1789 block_h + subpel_idx[1][my],
1790 x_off - mx_idx, y_off - my_idx, width, height);
1791 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1792 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1794 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1795 src2 - my_idx * linesize - mx_idx,
1796 EDGE_EMU_LINESIZE, linesize,
1797 block_w + subpel_idx[1][mx],
1798 block_h + subpel_idx[1][my],
1799 x_off - mx_idx, y_off - my_idx, width, height);
1800 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1801 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1803 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1804 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1807 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1808 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1809 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1813 static av_always_inline
1814 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1815 ThreadFrame *ref_frame, int x_off, int y_off,
1816 int bx_off, int by_off, int block_w, int block_h,
1817 int width, int height, VP56mv *mv)
1822 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1823 ref_frame, mv, x_off + bx_off, y_off + by_off,
1824 block_w, block_h, width, height, s->linesize,
1825 s->put_pixels_tab[block_w == 8]);
1828 if (s->profile == 3) {
1829 /* this block only applies VP8; it is safe to check
1830 * only the profile, as VP7 profile <= 1 */
1842 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1843 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1844 &uvmv, x_off + bx_off, y_off + by_off,
1845 block_w, block_h, width, height, s->uvlinesize,
1846 s->put_pixels_tab[1 + (block_w == 4)]);
1849 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1850 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1851 static av_always_inline
1852 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1855 /* Don't prefetch refs that haven't been used very often this frame. */
1856 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1857 int x_off = mb_x << 4, y_off = mb_y << 4;
1858 int mx = (mb->mv.x >> 2) + x_off + 8;
1859 int my = (mb->mv.y >> 2) + y_off;
1860 uint8_t **src = s->framep[ref]->tf.f->data;
1861 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1862 /* For threading, a ff_thread_await_progress here might be useful, but
1863 * it actually slows down the decoder. Since a bad prefetch doesn't
1864 * generate bad decoder output, we don't run it here. */
1865 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1866 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1867 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1872 * Apply motion vectors to prediction buffer, chapter 18.
1874 static av_always_inline
1875 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1876 VP8Macroblock *mb, int mb_x, int mb_y)
1878 int x_off = mb_x << 4, y_off = mb_y << 4;
1879 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1880 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1881 VP56mv *bmv = mb->bmv;
1883 switch (mb->partitioning) {
1884 case VP8_SPLITMVMODE_NONE:
1885 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1886 0, 0, 16, 16, width, height, &mb->mv);
1888 case VP8_SPLITMVMODE_4x4: {
1893 for (y = 0; y < 4; y++) {
1894 for (x = 0; x < 4; x++) {
1895 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1896 ref, &bmv[4 * y + x],
1897 4 * x + x_off, 4 * y + y_off, 4, 4,
1898 width, height, s->linesize,
1899 s->put_pixels_tab[2]);
1908 for (y = 0; y < 2; y++) {
1909 for (x = 0; x < 2; x++) {
1910 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1911 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1912 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1913 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1914 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1915 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1916 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1917 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1918 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT - 1))) >> 2;
1919 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT - 1))) >> 2;
1920 if (s->profile == 3) {
1924 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1925 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1926 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1927 width, height, s->uvlinesize,
1928 s->put_pixels_tab[2]);
1933 case VP8_SPLITMVMODE_16x8:
1934 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1935 0, 0, 16, 8, width, height, &bmv[0]);
1936 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1937 0, 8, 16, 8, width, height, &bmv[1]);
1939 case VP8_SPLITMVMODE_8x16:
1940 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1941 0, 0, 8, 16, width, height, &bmv[0]);
1942 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1943 8, 0, 8, 16, width, height, &bmv[1]);
1945 case VP8_SPLITMVMODE_8x8:
1946 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1947 0, 0, 8, 8, width, height, &bmv[0]);
1948 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1949 8, 0, 8, 8, width, height, &bmv[1]);
1950 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1951 0, 8, 8, 8, width, height, &bmv[2]);
1952 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1953 8, 8, 8, 8, width, height, &bmv[3]);
1958 static av_always_inline
1959 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1963 if (mb->mode != MODE_I4x4) {
1964 uint8_t *y_dst = dst[0];
1965 for (y = 0; y < 4; y++) {
1966 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1968 if (nnz4 & ~0x01010101) {
1969 for (x = 0; x < 4; x++) {
1970 if ((uint8_t) nnz4 == 1)
1971 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1974 else if ((uint8_t) nnz4 > 1)
1975 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1983 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1986 y_dst += 4 * s->linesize;
1990 for (ch = 0; ch < 2; ch++) {
1991 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1993 uint8_t *ch_dst = dst[1 + ch];
1994 if (nnz4 & ~0x01010101) {
1995 for (y = 0; y < 2; y++) {
1996 for (x = 0; x < 2; x++) {
1997 if ((uint8_t) nnz4 == 1)
1998 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1999 td->block[4 + ch][(y << 1) + x],
2001 else if ((uint8_t) nnz4 > 1)
2002 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2003 td->block[4 + ch][(y << 1) + x],
2007 goto chroma_idct_end;
2009 ch_dst += 4 * s->uvlinesize;
2012 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2020 static av_always_inline
2021 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2022 VP8FilterStrength *f, int is_vp7)
2024 int interior_limit, filter_level;
2026 if (s->segmentation.enabled) {
2027 filter_level = s->segmentation.filter_level[mb->segment];
2028 if (!s->segmentation.absolute_vals)
2029 filter_level += s->filter.level;
2031 filter_level = s->filter.level;
2033 if (s->lf_delta.enabled) {
2034 filter_level += s->lf_delta.ref[mb->ref_frame];
2035 filter_level += s->lf_delta.mode[mb->mode];
2038 filter_level = av_clip_uintp2(filter_level, 6);
2040 interior_limit = filter_level;
2041 if (s->filter.sharpness) {
2042 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2043 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2045 interior_limit = FFMAX(interior_limit, 1);
2047 f->filter_level = filter_level;
2048 f->inner_limit = interior_limit;
2049 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2050 mb->mode == VP8_MVMODE_SPLIT;
2053 static av_always_inline
2054 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2055 int mb_x, int mb_y, int is_vp7)
2057 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2058 int filter_level = f->filter_level;
2059 int inner_limit = f->inner_limit;
2060 int inner_filter = f->inner_filter;
2061 int linesize = s->linesize;
2062 int uvlinesize = s->uvlinesize;
2063 static const uint8_t hev_thresh_lut[2][64] = {
2064 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2065 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2066 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2068 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2069 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2070 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2078 bedge_lim_y = filter_level;
2079 bedge_lim_uv = filter_level * 2;
2080 mbedge_lim = filter_level + 2;
2083 bedge_lim_uv = filter_level * 2 + inner_limit;
2084 mbedge_lim = bedge_lim_y + 4;
2087 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2090 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2091 mbedge_lim, inner_limit, hev_thresh);
2092 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2093 mbedge_lim, inner_limit, hev_thresh);
2096 #define H_LOOP_FILTER_16Y_INNER(cond) \
2097 if (cond && inner_filter) { \
2098 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2099 bedge_lim_y, inner_limit, \
2101 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2102 bedge_lim_y, inner_limit, \
2104 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2105 bedge_lim_y, inner_limit, \
2107 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2108 uvlinesize, bedge_lim_uv, \
2109 inner_limit, hev_thresh); \
2112 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2115 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2116 mbedge_lim, inner_limit, hev_thresh);
2117 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2118 mbedge_lim, inner_limit, hev_thresh);
2122 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2123 linesize, bedge_lim_y,
2124 inner_limit, hev_thresh);
2125 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2126 linesize, bedge_lim_y,
2127 inner_limit, hev_thresh);
2128 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2129 linesize, bedge_lim_y,
2130 inner_limit, hev_thresh);
2131 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2132 dst[2] + 4 * uvlinesize,
2133 uvlinesize, bedge_lim_uv,
2134 inner_limit, hev_thresh);
2137 H_LOOP_FILTER_16Y_INNER(is_vp7)
2140 static av_always_inline
2141 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2144 int mbedge_lim, bedge_lim;
2145 int filter_level = f->filter_level;
2146 int inner_limit = f->inner_limit;
2147 int inner_filter = f->inner_filter;
2148 int linesize = s->linesize;
2153 bedge_lim = 2 * filter_level + inner_limit;
2154 mbedge_lim = bedge_lim + 4;
2157 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2159 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2160 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2161 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2165 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2167 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2168 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2169 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2173 #define MARGIN (16 << 2)
2174 static av_always_inline
2175 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2176 VP8Frame *prev_frame, int is_vp7)
2178 VP8Context *s = avctx->priv_data;
2181 s->mv_min.y = -MARGIN;
2182 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2183 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2184 VP8Macroblock *mb = s->macroblocks_base +
2185 ((s->mb_width + 1) * (mb_y + 1) + 1);
2186 int mb_xy = mb_y * s->mb_width;
2188 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2190 s->mv_min.x = -MARGIN;
2191 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2192 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2194 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2195 DC_PRED * 0x01010101);
2196 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2197 prev_frame && prev_frame->seg_map ?
2198 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2207 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2208 VP8Frame *prev_frame)
2210 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2213 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2214 VP8Frame *prev_frame)
2216 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2220 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2222 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2223 if (otd->thread_mb_pos < tmp) { \
2224 pthread_mutex_lock(&otd->lock); \
2225 td->wait_mb_pos = tmp; \
2227 if (otd->thread_mb_pos >= tmp) \
2229 pthread_cond_wait(&otd->cond, &otd->lock); \
2231 td->wait_mb_pos = INT_MAX; \
2232 pthread_mutex_unlock(&otd->lock); \
2236 #define update_pos(td, mb_y, mb_x) \
2238 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2239 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2241 int is_null = (next_td == NULL) || (prev_td == NULL); \
2242 int pos_check = (is_null) ? 1 \
2243 : (next_td != td && \
2244 pos >= next_td->wait_mb_pos) || \
2246 pos >= prev_td->wait_mb_pos); \
2247 td->thread_mb_pos = pos; \
2248 if (sliced_threading && pos_check) { \
2249 pthread_mutex_lock(&td->lock); \
2250 pthread_cond_broadcast(&td->cond); \
2251 pthread_mutex_unlock(&td->lock); \
2255 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2256 #define update_pos(td, mb_y, mb_x)
2259 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2260 int jobnr, int threadnr, int is_vp7)
2262 VP8Context *s = avctx->priv_data;
2263 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2264 int mb_y = td->thread_mb_pos >> 16;
2265 int mb_x, mb_xy = mb_y * s->mb_width;
2266 int num_jobs = s->num_jobs;
2267 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2268 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2271 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2272 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2273 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2278 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2279 if (mb_y == s->mb_height - 1)
2282 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2283 if (s->mb_layout == 1)
2284 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2286 // Make sure the previous frame has read its segmentation map,
2287 // if we re-use the same map.
2288 if (prev_frame && s->segmentation.enabled &&
2289 !s->segmentation.update_map)
2290 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2291 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2292 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2293 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2296 if (!is_vp7 || mb_y == 0)
2297 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2299 s->mv_min.x = -MARGIN;
2300 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2302 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2303 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2304 if (prev_td != td) {
2305 if (threadnr != 0) {
2306 check_thread_pos(td, prev_td,
2307 mb_x + (is_vp7 ? 2 : 1),
2308 mb_y - (is_vp7 ? 2 : 1));
2310 check_thread_pos(td, prev_td,
2311 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2312 mb_y - (is_vp7 ? 2 : 1));
2316 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2318 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2319 dst[2] - dst[1], 2);
2322 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2323 prev_frame && prev_frame->seg_map ?
2324 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2326 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2329 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2331 if (mb->mode <= MODE_I4x4)
2332 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2334 inter_predict(s, td, dst, mb, mb_x, mb_y);
2336 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2339 idct_mb(s, td, dst, mb);
2341 AV_ZERO64(td->left_nnz);
2342 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2344 /* Reset DC block predictors if they would exist
2345 * if the mb had coefficients */
2346 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2347 td->left_nnz[8] = 0;
2348 s->top_nnz[mb_x][8] = 0;
2352 if (s->deblock_filter)
2353 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2355 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2356 if (s->filter.simple)
2357 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2358 NULL, NULL, s->linesize, 0, 1);
2360 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2361 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2364 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2372 if (mb_x == s->mb_width + 1) {
2373 update_pos(td, mb_y, s->mb_width + 3);
2375 update_pos(td, mb_y, mb_x);
2380 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2381 int jobnr, int threadnr)
2383 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2386 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2387 int jobnr, int threadnr)
2389 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2392 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2393 int jobnr, int threadnr, int is_vp7)
2395 VP8Context *s = avctx->priv_data;
2396 VP8ThreadData *td = &s->thread_data[threadnr];
2397 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2398 AVFrame *curframe = s->curframe->tf.f;
2400 VP8ThreadData *prev_td, *next_td;
2402 curframe->data[0] + 16 * mb_y * s->linesize,
2403 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2404 curframe->data[2] + 8 * mb_y * s->uvlinesize
2407 if (s->mb_layout == 1)
2408 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2410 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2415 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2416 if (mb_y == s->mb_height - 1)
2419 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2421 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2422 VP8FilterStrength *f = &td->filter_strength[mb_x];
2424 check_thread_pos(td, prev_td,
2425 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2427 if (next_td != &s->thread_data[0])
2428 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2430 if (num_jobs == 1) {
2431 if (s->filter.simple)
2432 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2433 NULL, NULL, s->linesize, 0, 1);
2435 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2436 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2439 if (s->filter.simple)
2440 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2442 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2447 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2451 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2452 int jobnr, int threadnr)
2454 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2457 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2458 int jobnr, int threadnr)
2460 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2463 static av_always_inline
2464 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2465 int threadnr, int is_vp7)
2467 VP8Context *s = avctx->priv_data;
2468 VP8ThreadData *td = &s->thread_data[jobnr];
2469 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2470 VP8Frame *curframe = s->curframe;
2471 int mb_y, num_jobs = s->num_jobs;
2473 td->thread_nr = threadnr;
2474 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2475 if (mb_y >= s->mb_height)
2477 td->thread_mb_pos = mb_y << 16;
2478 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2479 if (s->deblock_filter)
2480 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2481 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2486 if (avctx->active_thread_type == FF_THREAD_FRAME)
2487 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2493 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2494 int jobnr, int threadnr)
2496 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2499 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2500 int jobnr, int threadnr)
2502 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2506 static av_always_inline
2507 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2508 AVPacket *avpkt, int is_vp7)
2510 VP8Context *s = avctx->priv_data;
2511 int ret, i, referenced, num_jobs;
2512 enum AVDiscard skip_thresh;
2513 VP8Frame *av_uninit(curframe), *prev_frame;
2516 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2518 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2523 prev_frame = s->framep[VP56_FRAME_CURRENT];
2525 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2526 s->update_altref == VP56_FRAME_CURRENT;
2528 skip_thresh = !referenced ? AVDISCARD_NONREF
2529 : !s->keyframe ? AVDISCARD_NONKEY
2532 if (avctx->skip_frame >= skip_thresh) {
2534 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2537 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2539 // release no longer referenced frames
2540 for (i = 0; i < 5; i++)
2541 if (s->frames[i].tf.f->data[0] &&
2542 &s->frames[i] != prev_frame &&
2543 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2544 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2545 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2546 vp8_release_frame(s, &s->frames[i]);
2548 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2550 /* Given that arithmetic probabilities are updated every frame, it's quite
2551 * likely that the values we have on a random interframe are complete
2552 * junk if we didn't start decode on a keyframe. So just don't display
2553 * anything rather than junk. */
2554 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2555 !s->framep[VP56_FRAME_GOLDEN] ||
2556 !s->framep[VP56_FRAME_GOLDEN2])) {
2557 av_log(avctx, AV_LOG_WARNING,
2558 "Discarding interframe without a prior keyframe!\n");
2559 ret = AVERROR_INVALIDDATA;
2563 curframe->tf.f->key_frame = s->keyframe;
2564 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2565 : AV_PICTURE_TYPE_P;
2566 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2569 // check if golden and altref are swapped
2570 if (s->update_altref != VP56_FRAME_NONE)
2571 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2573 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2575 if (s->update_golden != VP56_FRAME_NONE)
2576 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2578 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2581 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2583 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2585 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2587 ff_thread_finish_setup(avctx);
2589 s->linesize = curframe->tf.f->linesize[0];
2590 s->uvlinesize = curframe->tf.f->linesize[1];
2592 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2593 /* Zero macroblock structures for top/top-left prediction
2594 * from outside the frame. */
2596 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2597 (s->mb_width + 1) * sizeof(*s->macroblocks));
2598 if (!s->mb_layout && s->keyframe)
2599 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2601 memset(s->ref_count, 0, sizeof(s->ref_count));
2603 if (s->mb_layout == 1) {
2604 // Make sure the previous frame has read its segmentation map,
2605 // if we re-use the same map.
2606 if (prev_frame && s->segmentation.enabled &&
2607 !s->segmentation.update_map)
2608 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2610 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2612 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2615 if (avctx->active_thread_type == FF_THREAD_FRAME)
2618 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2619 s->num_jobs = num_jobs;
2620 s->curframe = curframe;
2621 s->prev_frame = prev_frame;
2622 s->mv_min.y = -MARGIN;
2623 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2624 for (i = 0; i < MAX_THREADS; i++) {
2625 s->thread_data[i].thread_mb_pos = 0;
2626 s->thread_data[i].wait_mb_pos = INT_MAX;
2629 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2632 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2635 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2636 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2639 // if future frames don't use the updated probabilities,
2640 // reset them to the values we saved
2641 if (!s->update_probabilities)
2642 s->prob[0] = s->prob[1];
2644 if (!s->invisible) {
2645 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2652 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2656 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2659 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2662 #if CONFIG_VP7_DECODER
2663 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2666 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2668 #endif /* CONFIG_VP7_DECODER */
2670 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2672 VP8Context *s = avctx->priv_data;
2675 vp8_decode_flush_impl(avctx, 1);
2676 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2677 av_frame_free(&s->frames[i].tf.f);
2682 static av_cold int vp8_init_frames(VP8Context *s)
2685 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2686 s->frames[i].tf.f = av_frame_alloc();
2687 if (!s->frames[i].tf.f)
2688 return AVERROR(ENOMEM);
2693 static av_always_inline
2694 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2696 VP8Context *s = avctx->priv_data;
2700 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2701 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2702 avctx->internal->allocate_progress = 1;
2704 ff_videodsp_init(&s->vdsp, 8);
2706 ff_vp78dsp_init(&s->vp8dsp);
2707 if (CONFIG_VP7_DECODER && is_vp7) {
2708 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2709 ff_vp7dsp_init(&s->vp8dsp);
2710 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2711 s->filter_mb_row = vp7_filter_mb_row;
2712 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2713 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2714 ff_vp8dsp_init(&s->vp8dsp);
2715 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2716 s->filter_mb_row = vp8_filter_mb_row;
2719 /* does not change for VP8 */
2720 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2722 if ((ret = vp8_init_frames(s)) < 0) {
2723 ff_vp8_decode_free(avctx);
2730 #if CONFIG_VP7_DECODER
2731 static int vp7_decode_init(AVCodecContext *avctx)
2733 return vp78_decode_init(avctx, IS_VP7);
2735 #endif /* CONFIG_VP7_DECODER */
2737 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2739 return vp78_decode_init(avctx, IS_VP8);
2742 #if CONFIG_VP8_DECODER
2743 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2745 VP8Context *s = avctx->priv_data;
2750 if ((ret = vp8_init_frames(s)) < 0) {
2751 ff_vp8_decode_free(avctx);
2758 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2760 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2761 const AVCodecContext *src)
2763 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2766 if (s->macroblocks_base &&
2767 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2769 s->mb_width = s_src->mb_width;
2770 s->mb_height = s_src->mb_height;
2773 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2774 s->segmentation = s_src->segmentation;
2775 s->lf_delta = s_src->lf_delta;
2776 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2778 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2779 if (s_src->frames[i].tf.f->data[0]) {
2780 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2786 s->framep[0] = REBASE(s_src->next_framep[0]);
2787 s->framep[1] = REBASE(s_src->next_framep[1]);
2788 s->framep[2] = REBASE(s_src->next_framep[2]);
2789 s->framep[3] = REBASE(s_src->next_framep[3]);
2793 #endif /* CONFIG_VP8_DECODER */
2795 #if CONFIG_VP7_DECODER
2796 AVCodec ff_vp7_decoder = {
2798 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2799 .type = AVMEDIA_TYPE_VIDEO,
2800 .id = AV_CODEC_ID_VP7,
2801 .priv_data_size = sizeof(VP8Context),
2802 .init = vp7_decode_init,
2803 .close = ff_vp8_decode_free,
2804 .decode = vp7_decode_frame,
2805 .capabilities = CODEC_CAP_DR1,
2806 .flush = vp8_decode_flush,
2808 #endif /* CONFIG_VP7_DECODER */
2810 #if CONFIG_VP8_DECODER
2811 AVCodec ff_vp8_decoder = {
2813 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2814 .type = AVMEDIA_TYPE_VIDEO,
2815 .id = AV_CODEC_ID_VP8,
2816 .priv_data_size = sizeof(VP8Context),
2817 .init = ff_vp8_decode_init,
2818 .close = ff_vp8_decode_free,
2819 .decode = ff_vp8_decode_frame,
2820 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2821 .flush = vp8_decode_flush,
2822 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2823 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2825 #endif /* CONFIG_VP7_DECODER */