2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
32 #include "rectangle.h"
41 static void free_buffers(VP8Context *s)
45 for (i = 0; i < MAX_THREADS; i++) {
47 pthread_cond_destroy(&s->thread_data[i].cond);
48 pthread_mutex_destroy(&s->thread_data[i].lock);
50 av_freep(&s->thread_data[i].filter_strength);
52 av_freep(&s->thread_data);
53 av_freep(&s->macroblocks_base);
54 av_freep(&s->intra4x4_pred_mode_top);
55 av_freep(&s->top_nnz);
56 av_freep(&s->top_border);
58 s->macroblocks = NULL;
61 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
64 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
65 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
67 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
68 ff_thread_release_buffer(s->avctx, &f->tf);
69 return AVERROR(ENOMEM);
74 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
76 av_buffer_unref(&f->seg_map);
77 ff_thread_release_buffer(s->avctx, &f->tf);
80 #if CONFIG_VP8_DECODER
81 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
85 vp8_release_frame(s, dst);
87 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
90 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
91 vp8_release_frame(s, dst);
92 return AVERROR(ENOMEM);
97 #endif /* CONFIG_VP8_DECODER */
99 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
101 VP8Context *s = avctx->priv_data;
104 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
105 vp8_release_frame(s, &s->frames[i]);
106 memset(s->framep, 0, sizeof(s->framep));
112 static void vp8_decode_flush(AVCodecContext *avctx)
114 vp8_decode_flush_impl(avctx, 0);
117 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
119 VP8Frame *frame = NULL;
122 // find a free buffer
123 for (i = 0; i < 5; i++)
124 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
125 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
126 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
127 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
128 frame = &s->frames[i];
132 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
135 if (frame->tf.f->data[0])
136 vp8_release_frame(s, frame);
141 static av_always_inline
142 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
144 AVCodecContext *avctx = s->avctx;
147 if (width != s->avctx->width ||
148 height != s->avctx->height) {
149 vp8_decode_flush_impl(s->avctx, 1);
151 ret = ff_set_dimensions(s->avctx, width, height);
156 s->mb_width = (s->avctx->coded_width + 15) / 16;
157 s->mb_height = (s->avctx->coded_height + 15) / 16;
159 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
160 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
161 if (!s->mb_layout) { // Frame threading and one thread
162 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
163 sizeof(*s->macroblocks));
164 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
165 } else // Sliced threading
166 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
167 sizeof(*s->macroblocks));
168 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
169 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
170 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
172 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
173 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
175 return AVERROR(ENOMEM);
178 for (i = 0; i < MAX_THREADS; i++) {
179 s->thread_data[i].filter_strength =
180 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
181 if (!s->thread_data[i].filter_strength) {
183 return AVERROR(ENOMEM);
186 pthread_mutex_init(&s->thread_data[i].lock, NULL);
187 pthread_cond_init(&s->thread_data[i].cond, NULL);
191 s->macroblocks = s->macroblocks_base + 1;
196 static int vp7_update_dimensions(VP8Context *s, int width, int height)
198 return update_dimensions(s, width, height, IS_VP7);
201 static int vp8_update_dimensions(VP8Context *s, int width, int height)
203 return update_dimensions(s, width, height, IS_VP8);
206 static void parse_segment_info(VP8Context *s)
208 VP56RangeCoder *c = &s->c;
211 s->segmentation.update_map = vp8_rac_get(c);
213 if (vp8_rac_get(c)) { // update segment feature data
214 s->segmentation.absolute_vals = vp8_rac_get(c);
216 for (i = 0; i < 4; i++)
217 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
219 for (i = 0; i < 4; i++)
220 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
222 if (s->segmentation.update_map)
223 for (i = 0; i < 3; i++)
224 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
227 static void update_lf_deltas(VP8Context *s)
229 VP56RangeCoder *c = &s->c;
232 for (i = 0; i < 4; i++) {
233 if (vp8_rac_get(c)) {
234 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
237 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
241 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
242 if (vp8_rac_get(c)) {
243 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
246 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
251 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
253 const uint8_t *sizes = buf;
256 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
258 buf += 3 * (s->num_coeff_partitions - 1);
259 buf_size -= 3 * (s->num_coeff_partitions - 1);
263 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
264 int size = AV_RL24(sizes + 3 * i);
265 if (buf_size - size < 0)
268 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
272 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
277 static void vp7_get_quants(VP8Context *s)
279 VP56RangeCoder *c = &s->c;
281 int yac_qi = vp8_rac_get_uint(c, 7);
282 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
283 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
284 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
286 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
288 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
289 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
290 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
291 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
292 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
293 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
296 static void get_quants(VP8Context *s)
298 VP56RangeCoder *c = &s->c;
301 int yac_qi = vp8_rac_get_uint(c, 7);
302 int ydc_delta = vp8_rac_get_sint(c, 4);
303 int y2dc_delta = vp8_rac_get_sint(c, 4);
304 int y2ac_delta = vp8_rac_get_sint(c, 4);
305 int uvdc_delta = vp8_rac_get_sint(c, 4);
306 int uvac_delta = vp8_rac_get_sint(c, 4);
308 for (i = 0; i < 4; i++) {
309 if (s->segmentation.enabled) {
310 base_qi = s->segmentation.base_quant[i];
311 if (!s->segmentation.absolute_vals)
316 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
317 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
318 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
319 /* 101581>>16 is equivalent to 155/100 */
320 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
321 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
322 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
324 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
325 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
330 * Determine which buffers golden and altref should be updated with after this frame.
331 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
333 * Intra frames update all 3 references
334 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
335 * If the update (golden|altref) flag is set, it's updated with the current frame
336 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
337 * If the flag is not set, the number read means:
339 * 1: VP56_FRAME_PREVIOUS
340 * 2: update golden with altref, or update altref with golden
342 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
344 VP56RangeCoder *c = &s->c;
347 return VP56_FRAME_CURRENT;
349 switch (vp8_rac_get_uint(c, 2)) {
351 return VP56_FRAME_PREVIOUS;
353 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
355 return VP56_FRAME_NONE;
358 static void vp78_reset_probability_tables(VP8Context *s)
361 for (i = 0; i < 4; i++)
362 for (j = 0; j < 16; j++)
363 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
364 sizeof(s->prob->token[i][j]));
367 static void vp78_update_probability_tables(VP8Context *s)
369 VP56RangeCoder *c = &s->c;
372 for (i = 0; i < 4; i++)
373 for (j = 0; j < 8; j++)
374 for (k = 0; k < 3; k++)
375 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
376 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
377 int prob = vp8_rac_get_uint(c, 8);
378 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
379 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
383 #define VP7_MVC_SIZE 17
384 #define VP8_MVC_SIZE 19
386 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
389 VP56RangeCoder *c = &s->c;
393 for (i = 0; i < 4; i++)
394 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
396 for (i = 0; i < 3; i++)
397 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
399 // 17.2 MV probability update
400 for (i = 0; i < 2; i++)
401 for (j = 0; j < mvc_size; j++)
402 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
403 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
406 static void update_refs(VP8Context *s)
408 VP56RangeCoder *c = &s->c;
410 int update_golden = vp8_rac_get(c);
411 int update_altref = vp8_rac_get(c);
413 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
414 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
417 static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
421 for (j = 1; j < 3; j++) {
422 for (i = 0; i < height / 2; i++)
423 memcpy(dst->data[j] + i * dst->linesize[j],
424 src->data[j] + i * src->linesize[j], width / 2);
428 static void fade(uint8_t *dst, uint8_t *src,
429 int width, int height, int linesize,
434 for (j = 0; j < height; j++) {
435 for (i = 0; i < width; i++) {
436 uint8_t y = src[j * linesize + i];
437 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
442 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
444 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
445 int beta = (int8_t) vp8_rac_get_uint(c, 8);
448 if (!s->keyframe && (alpha || beta)) {
449 int width = s->mb_width * 16;
450 int height = s->mb_height * 16;
453 if (!s->framep[VP56_FRAME_PREVIOUS])
454 return AVERROR_INVALIDDATA;
457 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
459 /* preserve the golden frame, write a new previous frame */
460 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
461 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
462 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
465 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
467 copy_luma(dst, src, width, height);
470 fade(dst->data[0], src->data[0],
471 width, height, dst->linesize[0], alpha, beta);
477 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
479 VP56RangeCoder *c = &s->c;
480 int part1_size, hscale, vscale, i, j, ret;
481 int width = s->avctx->width;
482 int height = s->avctx->height;
485 return AVERROR_INVALIDDATA;
488 s->profile = (buf[0] >> 1) & 7;
489 if (s->profile > 1) {
490 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
491 return AVERROR_INVALIDDATA;
494 s->keyframe = !(buf[0] & 1);
496 part1_size = AV_RL24(buf) >> 4;
498 buf += 4 - s->profile;
499 buf_size -= 4 - s->profile;
501 if (buf_size < part1_size) {
502 return AVERROR_INVALIDDATA;
505 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
507 ff_vp56_init_range_decoder(c, buf, part1_size);
509 buf_size -= part1_size;
511 /* A. Dimension information (keyframes only) */
513 width = vp8_rac_get_uint(c, 12);
514 height = vp8_rac_get_uint(c, 12);
515 hscale = vp8_rac_get_uint(c, 2);
516 vscale = vp8_rac_get_uint(c, 2);
517 if (hscale || vscale)
518 avpriv_request_sample(s->avctx, "Upscaling");
520 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
521 vp78_reset_probability_tables(s);
522 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
523 sizeof(s->prob->pred16x16));
524 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
525 sizeof(s->prob->pred8x8c));
526 for (i = 0; i < 2; i++)
527 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
528 sizeof(vp7_mv_default_prob[i]));
529 memset(&s->segmentation, 0, sizeof(s->segmentation));
530 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
531 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
534 if (s->keyframe || s->profile > 0)
535 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
537 /* B. Decoding information for all four macroblock-level features */
538 for (i = 0; i < 4; i++) {
539 s->feature_enabled[i] = vp8_rac_get(c);
540 if (s->feature_enabled[i]) {
541 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
543 for (j = 0; j < 3; j++)
544 s->feature_index_prob[i][j] =
545 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
547 if (vp7_feature_value_size[s->profile][i])
548 for (j = 0; j < 4; j++)
549 s->feature_value[i][j] =
550 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
554 s->segmentation.enabled = 0;
555 s->segmentation.update_map = 0;
556 s->lf_delta.enabled = 0;
558 s->num_coeff_partitions = 1;
559 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
561 if (!s->macroblocks_base || /* first frame */
562 width != s->avctx->width || height != s->avctx->height ||
563 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
564 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
568 /* C. Dequantization indices */
571 /* D. Golden frame update flag (a Flag) for interframes only */
573 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
574 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
578 s->update_probabilities = 1;
581 if (s->profile > 0) {
582 s->update_probabilities = vp8_rac_get(c);
583 if (!s->update_probabilities)
584 s->prob[1] = s->prob[0];
587 s->fade_present = vp8_rac_get(c);
590 /* E. Fading information for previous frame */
591 if (s->fade_present && vp8_rac_get(c)) {
592 if ((ret = vp7_fade_frame(s ,c)) < 0)
596 /* F. Loop filter type */
598 s->filter.simple = vp8_rac_get(c);
600 /* G. DCT coefficient ordering specification */
602 for (i = 1; i < 16; i++)
603 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
605 /* H. Loop filter levels */
607 s->filter.simple = vp8_rac_get(c);
608 s->filter.level = vp8_rac_get_uint(c, 6);
609 s->filter.sharpness = vp8_rac_get_uint(c, 3);
611 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
612 vp78_update_probability_tables(s);
614 s->mbskip_enabled = 0;
616 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
618 s->prob->intra = vp8_rac_get_uint(c, 8);
619 s->prob->last = vp8_rac_get_uint(c, 8);
620 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
626 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
628 VP56RangeCoder *c = &s->c;
629 int header_size, hscale, vscale, ret;
630 int width = s->avctx->width;
631 int height = s->avctx->height;
633 s->keyframe = !(buf[0] & 1);
634 s->profile = (buf[0]>>1) & 7;
635 s->invisible = !(buf[0] & 0x10);
636 header_size = AV_RL24(buf) >> 5;
641 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
644 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
645 sizeof(s->put_pixels_tab));
646 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
647 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
648 sizeof(s->put_pixels_tab));
650 if (header_size > buf_size - 7 * s->keyframe) {
651 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
652 return AVERROR_INVALIDDATA;
656 if (AV_RL24(buf) != 0x2a019d) {
657 av_log(s->avctx, AV_LOG_ERROR,
658 "Invalid start code 0x%x\n", AV_RL24(buf));
659 return AVERROR_INVALIDDATA;
661 width = AV_RL16(buf + 3) & 0x3fff;
662 height = AV_RL16(buf + 5) & 0x3fff;
663 hscale = buf[4] >> 6;
664 vscale = buf[6] >> 6;
668 if (hscale || vscale)
669 avpriv_request_sample(s->avctx, "Upscaling");
671 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
672 vp78_reset_probability_tables(s);
673 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
674 sizeof(s->prob->pred16x16));
675 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
676 sizeof(s->prob->pred8x8c));
677 memcpy(s->prob->mvc, vp8_mv_default_prob,
678 sizeof(s->prob->mvc));
679 memset(&s->segmentation, 0, sizeof(s->segmentation));
680 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
683 ff_vp56_init_range_decoder(c, buf, header_size);
685 buf_size -= header_size;
688 s->colorspace = vp8_rac_get(c);
690 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
691 s->fullrange = vp8_rac_get(c);
694 if ((s->segmentation.enabled = vp8_rac_get(c)))
695 parse_segment_info(s);
697 s->segmentation.update_map = 0; // FIXME: move this to some init function?
699 s->filter.simple = vp8_rac_get(c);
700 s->filter.level = vp8_rac_get_uint(c, 6);
701 s->filter.sharpness = vp8_rac_get_uint(c, 3);
703 if ((s->lf_delta.enabled = vp8_rac_get(c)))
707 if (setup_partitions(s, buf, buf_size)) {
708 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
709 return AVERROR_INVALIDDATA;
712 if (!s->macroblocks_base || /* first frame */
713 width != s->avctx->width || height != s->avctx->height)
714 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
721 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
722 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
725 // if we aren't saving this frame's probabilities for future frames,
726 // make a copy of the current probabilities
727 if (!(s->update_probabilities = vp8_rac_get(c)))
728 s->prob[1] = s->prob[0];
730 s->update_last = s->keyframe || vp8_rac_get(c);
732 vp78_update_probability_tables(s);
734 if ((s->mbskip_enabled = vp8_rac_get(c)))
735 s->prob->mbskip = vp8_rac_get_uint(c, 8);
738 s->prob->intra = vp8_rac_get_uint(c, 8);
739 s->prob->last = vp8_rac_get_uint(c, 8);
740 s->prob->golden = vp8_rac_get_uint(c, 8);
741 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
747 static av_always_inline
748 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
750 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
751 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
755 * Motion vector coding, 17.1.
757 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
761 if (vp56_rac_get_prob_branchy(c, p[0])) {
764 for (i = 0; i < 3; i++)
765 x += vp56_rac_get_prob(c, p[9 + i]) << i;
766 for (i = (vp7 ? 7 : 9); i > 3; i--)
767 x += vp56_rac_get_prob(c, p[9 + i]) << i;
768 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
772 const uint8_t *ps = p + 2;
773 bit = vp56_rac_get_prob(c, *ps);
776 bit = vp56_rac_get_prob(c, *ps);
779 x += vp56_rac_get_prob(c, *ps);
782 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
785 static av_always_inline
786 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
789 return vp7_submv_prob;
792 return vp8_submv_prob[4 - !!left];
794 return vp8_submv_prob[2];
795 return vp8_submv_prob[1 - !!left];
799 * Split motion vector prediction, 16.4.
800 * @returns the number of motion vectors parsed (2, 4 or 16)
802 static av_always_inline
803 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
804 int layout, int is_vp7)
808 VP8Macroblock *top_mb;
809 VP8Macroblock *left_mb = &mb[-1];
810 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
811 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
813 VP56mv *left_mv = left_mb->bmv;
814 VP56mv *cur_mv = mb->bmv;
816 if (!layout) // layout is inlined, s->mb_layout is not
819 top_mb = &mb[-s->mb_width - 1];
820 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
821 top_mv = top_mb->bmv;
823 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
824 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
825 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
827 part_idx = VP8_SPLITMVMODE_8x8;
829 part_idx = VP8_SPLITMVMODE_4x4;
832 num = vp8_mbsplit_count[part_idx];
833 mbsplits_cur = vp8_mbsplits[part_idx],
834 firstidx = vp8_mbfirstidx[part_idx];
835 mb->partitioning = part_idx;
837 for (n = 0; n < num; n++) {
839 uint32_t left, above;
840 const uint8_t *submv_prob;
843 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
845 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
847 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
849 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
851 submv_prob = get_submv_prob(left, above, is_vp7);
853 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
854 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
855 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
856 mb->bmv[n].y = mb->mv.y +
857 read_mv_component(c, s->prob->mvc[0], is_vp7);
858 mb->bmv[n].x = mb->mv.x +
859 read_mv_component(c, s->prob->mvc[1], is_vp7);
861 AV_ZERO32(&mb->bmv[n]);
864 AV_WN32A(&mb->bmv[n], above);
867 AV_WN32A(&mb->bmv[n], left);
875 * The vp7 reference decoder uses a padding macroblock column (added to right
876 * edge of the frame) to guard against illegal macroblock offsets. The
877 * algorithm has bugs that permit offsets to straddle the padding column.
878 * This function replicates those bugs.
880 * @param[out] edge_x macroblock x address
881 * @param[out] edge_y macroblock y address
883 * @return macroblock offset legal (boolean)
885 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
886 int xoffset, int yoffset, int boundary,
887 int *edge_x, int *edge_y)
889 int vwidth = mb_width + 1;
890 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
891 if (new < boundary || new % vwidth == vwidth - 1)
893 *edge_y = new / vwidth;
894 *edge_x = new % vwidth;
898 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
900 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
903 static av_always_inline
904 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
905 int mb_x, int mb_y, int layout)
907 VP8Macroblock *mb_edge[12];
908 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
909 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
912 uint8_t cnt[3] = { 0 };
913 VP56RangeCoder *c = &s->c;
916 AV_ZERO32(&near_mv[0]);
917 AV_ZERO32(&near_mv[1]);
918 AV_ZERO32(&near_mv[2]);
920 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
921 const VP7MVPred * pred = &vp7_mv_pred[i];
924 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
925 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
926 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
927 ? s->macroblocks_base + 1 + edge_x +
928 (s->mb_width + 1) * (edge_y + 1)
929 : s->macroblocks + edge_x +
930 (s->mb_height - edge_y - 1) * 2;
931 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
933 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
934 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
936 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
937 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
941 AV_WN32A(&near_mv[CNT_NEAR], mv);
945 AV_WN32A(&near_mv[CNT_NEAREST], mv);
954 cnt[idx] += vp7_mv_pred[i].score;
957 mb->partitioning = VP8_SPLITMVMODE_NONE;
959 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
960 mb->mode = VP8_MVMODE_MV;
962 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
964 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
966 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
967 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
969 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
971 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
972 mb->mode = VP8_MVMODE_SPLIT;
973 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
975 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
976 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
980 mb->mv = near_mv[CNT_NEAR];
984 mb->mv = near_mv[CNT_NEAREST];
988 mb->mode = VP8_MVMODE_ZERO;
994 static av_always_inline
995 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
996 int mb_x, int mb_y, int layout)
998 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1001 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1002 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1004 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1005 int8_t *sign_bias = s->sign_bias;
1007 uint8_t cnt[4] = { 0 };
1008 VP56RangeCoder *c = &s->c;
1010 if (!layout) { // layout is inlined (s->mb_layout is not)
1011 mb_edge[0] = mb + 2;
1012 mb_edge[2] = mb + 1;
1014 mb_edge[0] = mb - s->mb_width - 1;
1015 mb_edge[2] = mb - s->mb_width - 2;
1018 AV_ZERO32(&near_mv[0]);
1019 AV_ZERO32(&near_mv[1]);
1020 AV_ZERO32(&near_mv[2]);
1022 /* Process MB on top, left and top-left */
1023 #define MV_EDGE_CHECK(n) \
1025 VP8Macroblock *edge = mb_edge[n]; \
1026 int edge_ref = edge->ref_frame; \
1027 if (edge_ref != VP56_FRAME_CURRENT) { \
1028 uint32_t mv = AV_RN32A(&edge->mv); \
1030 if (cur_sign_bias != sign_bias[edge_ref]) { \
1031 /* SWAR negate of the values in mv. */ \
1033 mv = ((mv & 0x7fff7fff) + \
1034 0x00010001) ^ (mv & 0x80008000); \
1036 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1037 AV_WN32A(&near_mv[++idx], mv); \
1038 cnt[idx] += 1 + (n != 2); \
1040 cnt[CNT_ZERO] += 1 + (n != 2); \
1048 mb->partitioning = VP8_SPLITMVMODE_NONE;
1049 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1050 mb->mode = VP8_MVMODE_MV;
1052 /* If we have three distinct MVs, merge first and last if they're the same */
1053 if (cnt[CNT_SPLITMV] &&
1054 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1055 cnt[CNT_NEAREST] += 1;
1057 /* Swap near and nearest if necessary */
1058 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1059 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1060 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1063 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1064 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1065 /* Choose the best mv out of 0,0 and the nearest mv */
1066 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1067 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1068 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1069 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1071 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1072 mb->mode = VP8_MVMODE_SPLIT;
1073 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1075 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1076 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
1077 mb->bmv[0] = mb->mv;
1080 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1081 mb->bmv[0] = mb->mv;
1084 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1085 mb->bmv[0] = mb->mv;
1088 mb->mode = VP8_MVMODE_ZERO;
1090 mb->bmv[0] = mb->mv;
1094 static av_always_inline
1095 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1096 int mb_x, int keyframe, int layout)
1098 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1101 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1102 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1107 uint8_t *const left = s->intra4x4_pred_mode_left;
1109 top = mb->intra4x4_pred_mode_top;
1111 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1112 for (y = 0; y < 4; y++) {
1113 for (x = 0; x < 4; x++) {
1115 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1116 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1117 left[y] = top[x] = *intra4x4;
1123 for (i = 0; i < 16; i++)
1124 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1125 vp8_pred4x4_prob_inter);
1129 static av_always_inline
1130 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1131 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1133 VP56RangeCoder *c = &s->c;
1134 static const char *vp7_feature_name[] = { "q-index",
1136 "partial-golden-update",
1141 for (i = 0; i < 4; i++) {
1142 if (s->feature_enabled[i]) {
1143 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1144 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1145 s->feature_index_prob[i]);
1146 av_log(s->avctx, AV_LOG_WARNING,
1147 "Feature %s present in macroblock (value 0x%x)\n",
1148 vp7_feature_name[i], s->feature_value[i][index]);
1152 } else if (s->segmentation.update_map)
1153 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
1154 else if (s->segmentation.enabled)
1155 *segment = ref ? *ref : *segment;
1156 mb->segment = *segment;
1158 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1161 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1162 vp8_pred16x16_prob_intra);
1164 if (mb->mode == MODE_I4x4) {
1165 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1167 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1168 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1169 if (s->mb_layout == 1)
1170 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1172 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1173 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1176 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1177 vp8_pred8x8c_prob_intra);
1178 mb->ref_frame = VP56_FRAME_CURRENT;
1179 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1181 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1183 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1184 : VP56_FRAME_GOLDEN;
1186 mb->ref_frame = VP56_FRAME_PREVIOUS;
1187 s->ref_count[mb->ref_frame - 1]++;
1189 // motion vectors, 16.3
1191 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1193 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1196 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1198 if (mb->mode == MODE_I4x4)
1199 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1201 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1203 mb->ref_frame = VP56_FRAME_CURRENT;
1204 mb->partitioning = VP8_SPLITMVMODE_NONE;
1205 AV_ZERO32(&mb->bmv[0]);
1210 * @param r arithmetic bitstream reader context
1211 * @param block destination for block coefficients
1212 * @param probs probabilities to use when reading trees from the bitstream
1213 * @param i initial coeff index, 0 unless a separate DC block is coded
1214 * @param qmul array holding the dc/ac dequant factor at position 0/1
1216 * @return 0 if no coeffs were decoded
1217 * otherwise, the index of the last coeff decoded plus one
1219 static av_always_inline
1220 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1221 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1222 int i, uint8_t *token_prob, int16_t qmul[2],
1223 const uint8_t scan[16], int vp7)
1225 VP56RangeCoder c = *r;
1230 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1234 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1236 break; // invalid input; blocks should end with EOB
1237 token_prob = probs[i][0];
1243 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1245 token_prob = probs[i + 1][1];
1247 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1248 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1250 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1254 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1255 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1256 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1257 } else { // DCT_CAT2
1259 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1260 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1262 } else { // DCT_CAT3 and up
1263 int a = vp56_rac_get_prob(&c, token_prob[8]);
1264 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1265 int cat = (a << 1) + b;
1266 coeff = 3 + (8 << cat);
1267 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1270 token_prob = probs[i + 1][2];
1272 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1279 static av_always_inline
1280 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1282 int16_t dc = block[0];
1290 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1291 block[0] = pred[0] = dc;
1296 block[0] = pred[0] = dc;
1302 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1304 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1305 int i, uint8_t *token_prob,
1307 const uint8_t scan[16])
1309 return decode_block_coeffs_internal(r, block, probs, i,
1310 token_prob, qmul, scan, IS_VP7);
1313 #ifndef vp8_decode_block_coeffs_internal
1314 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1316 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1317 int i, uint8_t *token_prob,
1320 return decode_block_coeffs_internal(r, block, probs, i,
1321 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1326 * @param c arithmetic bitstream reader context
1327 * @param block destination for block coefficients
1328 * @param probs probabilities to use when reading trees from the bitstream
1329 * @param i initial coeff index, 0 unless a separate DC block is coded
1330 * @param zero_nhood the initial prediction context for number of surrounding
1331 * all-zero blocks (only left/top, so 0-2)
1332 * @param qmul array holding the dc/ac dequant factor at position 0/1
1334 * @return 0 if no coeffs were decoded
1335 * otherwise, the index of the last coeff decoded plus one
1337 static av_always_inline
1338 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1339 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1340 int i, int zero_nhood, int16_t qmul[2],
1341 const uint8_t scan[16], int vp7)
1343 uint8_t *token_prob = probs[i][zero_nhood];
1344 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1346 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1347 token_prob, qmul, scan)
1348 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1352 static av_always_inline
1353 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1354 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1357 int i, x, y, luma_start = 0, luma_ctx = 3;
1358 int nnz_pred, nnz, nnz_total = 0;
1359 int segment = mb->segment;
1362 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1363 nnz_pred = t_nnz[8] + l_nnz[8];
1365 // decode DC values and do hadamard
1366 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1367 nnz_pred, s->qmat[segment].luma_dc_qmul,
1368 ff_zigzag_scan, is_vp7);
1369 l_nnz[8] = t_nnz[8] = !!nnz;
1371 if (is_vp7 && mb->mode > MODE_I4x4) {
1372 nnz |= inter_predict_dc(td->block_dc,
1373 s->inter_dc_pred[mb->ref_frame - 1]);
1380 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1382 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1389 for (y = 0; y < 4; y++)
1390 for (x = 0; x < 4; x++) {
1391 nnz_pred = l_nnz[y] + t_nnz[x];
1392 nnz = decode_block_coeffs(c, td->block[y][x],
1393 s->prob->token[luma_ctx],
1394 luma_start, nnz_pred,
1395 s->qmat[segment].luma_qmul,
1396 s->prob[0].scan, is_vp7);
1397 /* nnz+block_dc may be one more than the actual last index,
1398 * but we don't care */
1399 td->non_zero_count_cache[y][x] = nnz + block_dc;
1400 t_nnz[x] = l_nnz[y] = !!nnz;
1405 // TODO: what to do about dimensions? 2nd dim for luma is x,
1406 // but for chroma it's (y<<1)|x
1407 for (i = 4; i < 6; i++)
1408 for (y = 0; y < 2; y++)
1409 for (x = 0; x < 2; x++) {
1410 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1411 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1412 s->prob->token[2], 0, nnz_pred,
1413 s->qmat[segment].chroma_qmul,
1414 s->prob[0].scan, is_vp7);
1415 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1416 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1420 // if there were no coded coeffs despite the macroblock not being marked skip,
1421 // we MUST not do the inner loop filter and should not do IDCT
1422 // Since skip isn't used for bitstream prediction, just manually set it.
1427 static av_always_inline
1428 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1429 uint8_t *src_cb, uint8_t *src_cr,
1430 int linesize, int uvlinesize, int simple)
1432 AV_COPY128(top_border, src_y + 15 * linesize);
1434 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1435 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1439 static av_always_inline
1440 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1441 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1442 int mb_y, int mb_width, int simple, int xchg)
1444 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1446 src_cb -= uvlinesize;
1447 src_cr -= uvlinesize;
1449 #define XCHG(a, b, xchg) \
1457 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1458 XCHG(top_border, src_y, xchg);
1459 XCHG(top_border + 8, src_y + 8, 1);
1460 if (mb_x < mb_width - 1)
1461 XCHG(top_border + 32, src_y + 16, 1);
1463 // only copy chroma for normal loop filter
1464 // or to initialize the top row to 127
1465 if (!simple || !mb_y) {
1466 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1467 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1468 XCHG(top_border + 16, src_cb, 1);
1469 XCHG(top_border + 24, src_cr, 1);
1473 static av_always_inline
1474 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1477 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1479 return mb_y ? mode : LEFT_DC_PRED8x8;
1482 static av_always_inline
1483 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1486 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1488 return mb_y ? mode : HOR_PRED8x8;
1491 static av_always_inline
1492 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1496 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1498 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1500 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1501 case PLANE_PRED8x8: /* TM */
1502 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1507 static av_always_inline
1508 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1511 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1513 return mb_y ? mode : HOR_VP8_PRED;
1517 static av_always_inline
1518 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1519 int *copy_buf, int vp7)
1523 if (!mb_x && mb_y) {
1528 case DIAG_DOWN_LEFT_PRED:
1529 case VERT_LEFT_PRED:
1530 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1538 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1540 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1541 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1542 * as 16x16/8x8 DC */
1543 case DIAG_DOWN_RIGHT_PRED:
1544 case VERT_RIGHT_PRED:
1553 static av_always_inline
1554 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1555 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1557 int x, y, mode, nnz;
1560 /* for the first row, we need to run xchg_mb_border to init the top edge
1561 * to 127 otherwise, skip it if we aren't going to deblock */
1562 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1563 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1564 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1565 s->filter.simple, 1);
1567 if (mb->mode < MODE_I4x4) {
1568 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1569 s->hpc.pred16x16[mode](dst[0], s->linesize);
1571 uint8_t *ptr = dst[0];
1572 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1573 const uint8_t lo = is_vp7 ? 128 : 127;
1574 const uint8_t hi = is_vp7 ? 128 : 129;
1575 uint8_t tr_top[4] = { lo, lo, lo, lo };
1577 // all blocks on the right edge of the macroblock use bottom edge
1578 // the top macroblock for their topright edge
1579 uint8_t *tr_right = ptr - s->linesize + 16;
1581 // if we're on the right edge of the frame, said edge is extended
1582 // from the top macroblock
1583 if (mb_y && mb_x == s->mb_width - 1) {
1584 tr = tr_right[-1] * 0x01010101u;
1585 tr_right = (uint8_t *) &tr;
1589 AV_ZERO128(td->non_zero_count_cache);
1591 for (y = 0; y < 4; y++) {
1592 uint8_t *topright = ptr + 4 - s->linesize;
1593 for (x = 0; x < 4; x++) {
1594 int copy = 0, linesize = s->linesize;
1595 uint8_t *dst = ptr + 4 * x;
1596 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1598 if ((y == 0 || x == 3) && mb_y == 0) {
1601 topright = tr_right;
1603 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1604 mb_y + y, ©, is_vp7);
1606 dst = copy_dst + 12;
1610 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1612 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1616 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1625 copy_dst[11] = ptr[4 * x - 1];
1626 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1627 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1628 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1631 s->hpc.pred4x4[mode](dst, topright, linesize);
1633 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1634 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1635 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1636 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1639 nnz = td->non_zero_count_cache[y][x];
1642 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1643 td->block[y][x], s->linesize);
1645 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1646 td->block[y][x], s->linesize);
1651 ptr += 4 * s->linesize;
1656 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1657 mb_x, mb_y, is_vp7);
1658 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1659 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1661 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1662 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1663 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1664 s->filter.simple, 0);
1667 static const uint8_t subpel_idx[3][8] = {
1668 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1669 // also function pointer index
1670 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1671 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1677 * @param s VP8 decoding context
1678 * @param dst target buffer for block data at block position
1679 * @param ref reference picture buffer at origin (0, 0)
1680 * @param mv motion vector (relative to block position) to get pixel data from
1681 * @param x_off horizontal position of block from origin (0, 0)
1682 * @param y_off vertical position of block from origin (0, 0)
1683 * @param block_w width of block (16, 8 or 4)
1684 * @param block_h height of block (always same as block_w)
1685 * @param width width of src/dst plane data
1686 * @param height height of src/dst plane data
1687 * @param linesize size of a single line of plane data, including padding
1688 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1690 static av_always_inline
1691 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1692 ThreadFrame *ref, const VP56mv *mv,
1693 int x_off, int y_off, int block_w, int block_h,
1694 int width, int height, ptrdiff_t linesize,
1695 vp8_mc_func mc_func[3][3])
1697 uint8_t *src = ref->f->data[0];
1700 int src_linesize = linesize;
1702 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1703 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1705 x_off += mv->x >> 2;
1706 y_off += mv->y >> 2;
1709 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1710 src += y_off * linesize + x_off;
1711 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1712 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1713 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1714 src - my_idx * linesize - mx_idx,
1715 EDGE_EMU_LINESIZE, linesize,
1716 block_w + subpel_idx[1][mx],
1717 block_h + subpel_idx[1][my],
1718 x_off - mx_idx, y_off - my_idx,
1720 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1721 src_linesize = EDGE_EMU_LINESIZE;
1723 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1725 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1726 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1727 linesize, block_h, 0, 0);
1732 * chroma MC function
1734 * @param s VP8 decoding context
1735 * @param dst1 target buffer for block data at block position (U plane)
1736 * @param dst2 target buffer for block data at block position (V plane)
1737 * @param ref reference picture buffer at origin (0, 0)
1738 * @param mv motion vector (relative to block position) to get pixel data from
1739 * @param x_off horizontal position of block from origin (0, 0)
1740 * @param y_off vertical position of block from origin (0, 0)
1741 * @param block_w width of block (16, 8 or 4)
1742 * @param block_h height of block (always same as block_w)
1743 * @param width width of src/dst plane data
1744 * @param height height of src/dst plane data
1745 * @param linesize size of a single line of plane data, including padding
1746 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1748 static av_always_inline
1749 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1750 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1751 int x_off, int y_off, int block_w, int block_h,
1752 int width, int height, ptrdiff_t linesize,
1753 vp8_mc_func mc_func[3][3])
1755 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1758 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1759 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1761 x_off += mv->x >> 3;
1762 y_off += mv->y >> 3;
1765 src1 += y_off * linesize + x_off;
1766 src2 += y_off * linesize + x_off;
1767 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1768 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1769 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1770 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1771 src1 - my_idx * linesize - mx_idx,
1772 EDGE_EMU_LINESIZE, linesize,
1773 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1774 x_off - mx_idx, y_off - my_idx, width, height);
1775 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1776 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1778 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1779 src2 - my_idx * linesize - mx_idx,
1780 EDGE_EMU_LINESIZE, linesize,
1781 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1782 x_off - mx_idx, y_off - my_idx, width, height);
1783 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1784 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1786 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1787 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1790 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1791 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1792 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1796 static av_always_inline
1797 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1798 ThreadFrame *ref_frame, int x_off, int y_off,
1799 int bx_off, int by_off, int block_w, int block_h,
1800 int width, int height, VP56mv *mv)
1805 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1806 ref_frame, mv, x_off + bx_off, y_off + by_off,
1807 block_w, block_h, width, height, s->linesize,
1808 s->put_pixels_tab[block_w == 8]);
1811 if (s->profile == 3) {
1812 /* this block only applies VP8; it is safe to check
1813 * only the profile, as VP7 profile <= 1 */
1825 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1826 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1827 &uvmv, x_off + bx_off, y_off + by_off,
1828 block_w, block_h, width, height, s->uvlinesize,
1829 s->put_pixels_tab[1 + (block_w == 4)]);
1832 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1833 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1834 static av_always_inline
1835 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1838 /* Don't prefetch refs that haven't been used very often this frame. */
1839 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1840 int x_off = mb_x << 4, y_off = mb_y << 4;
1841 int mx = (mb->mv.x >> 2) + x_off + 8;
1842 int my = (mb->mv.y >> 2) + y_off;
1843 uint8_t **src = s->framep[ref]->tf.f->data;
1844 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1845 /* For threading, a ff_thread_await_progress here might be useful, but
1846 * it actually slows down the decoder. Since a bad prefetch doesn't
1847 * generate bad decoder output, we don't run it here. */
1848 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1849 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1850 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1855 * Apply motion vectors to prediction buffer, chapter 18.
1857 static av_always_inline
1858 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1859 VP8Macroblock *mb, int mb_x, int mb_y)
1861 int x_off = mb_x << 4, y_off = mb_y << 4;
1862 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1863 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1864 VP56mv *bmv = mb->bmv;
1866 switch (mb->partitioning) {
1867 case VP8_SPLITMVMODE_NONE:
1868 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1869 0, 0, 16, 16, width, height, &mb->mv);
1871 case VP8_SPLITMVMODE_4x4: {
1876 for (y = 0; y < 4; y++) {
1877 for (x = 0; x < 4; x++) {
1878 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1879 ref, &bmv[4 * y + x],
1880 4 * x + x_off, 4 * y + y_off, 4, 4,
1881 width, height, s->linesize,
1882 s->put_pixels_tab[2]);
1891 for (y = 0; y < 2; y++) {
1892 for (x = 0; x < 2; x++) {
1893 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1894 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1895 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1896 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1897 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1898 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1899 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1900 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1901 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1902 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1903 if (s->profile == 3) {
1907 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1908 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1909 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1910 width, height, s->uvlinesize,
1911 s->put_pixels_tab[2]);
1916 case VP8_SPLITMVMODE_16x8:
1917 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1918 0, 0, 16, 8, width, height, &bmv[0]);
1919 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1920 0, 8, 16, 8, width, height, &bmv[1]);
1922 case VP8_SPLITMVMODE_8x16:
1923 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1924 0, 0, 8, 16, width, height, &bmv[0]);
1925 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1926 8, 0, 8, 16, width, height, &bmv[1]);
1928 case VP8_SPLITMVMODE_8x8:
1929 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1930 0, 0, 8, 8, width, height, &bmv[0]);
1931 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1932 8, 0, 8, 8, width, height, &bmv[1]);
1933 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1934 0, 8, 8, 8, width, height, &bmv[2]);
1935 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1936 8, 8, 8, 8, width, height, &bmv[3]);
1941 static av_always_inline
1942 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1946 if (mb->mode != MODE_I4x4) {
1947 uint8_t *y_dst = dst[0];
1948 for (y = 0; y < 4; y++) {
1949 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1951 if (nnz4 & ~0x01010101) {
1952 for (x = 0; x < 4; x++) {
1953 if ((uint8_t) nnz4 == 1)
1954 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1957 else if ((uint8_t) nnz4 > 1)
1958 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1966 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1969 y_dst += 4 * s->linesize;
1973 for (ch = 0; ch < 2; ch++) {
1974 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1976 uint8_t *ch_dst = dst[1 + ch];
1977 if (nnz4 & ~0x01010101) {
1978 for (y = 0; y < 2; y++) {
1979 for (x = 0; x < 2; x++) {
1980 if ((uint8_t) nnz4 == 1)
1981 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1982 td->block[4 + ch][(y << 1) + x],
1984 else if ((uint8_t) nnz4 > 1)
1985 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
1986 td->block[4 + ch][(y << 1) + x],
1990 goto chroma_idct_end;
1992 ch_dst += 4 * s->uvlinesize;
1995 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2003 static av_always_inline
2004 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2005 VP8FilterStrength *f, int is_vp7)
2007 int interior_limit, filter_level;
2009 if (s->segmentation.enabled) {
2010 filter_level = s->segmentation.filter_level[mb->segment];
2011 if (!s->segmentation.absolute_vals)
2012 filter_level += s->filter.level;
2014 filter_level = s->filter.level;
2016 if (s->lf_delta.enabled) {
2017 filter_level += s->lf_delta.ref[mb->ref_frame];
2018 filter_level += s->lf_delta.mode[mb->mode];
2021 filter_level = av_clip_uintp2(filter_level, 6);
2023 interior_limit = filter_level;
2024 if (s->filter.sharpness) {
2025 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2026 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2028 interior_limit = FFMAX(interior_limit, 1);
2030 f->filter_level = filter_level;
2031 f->inner_limit = interior_limit;
2032 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2033 mb->mode == VP8_MVMODE_SPLIT;
2036 static av_always_inline
2037 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2038 int mb_x, int mb_y, int is_vp7)
2040 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2041 int filter_level = f->filter_level;
2042 int inner_limit = f->inner_limit;
2043 int inner_filter = f->inner_filter;
2044 int linesize = s->linesize;
2045 int uvlinesize = s->uvlinesize;
2046 static const uint8_t hev_thresh_lut[2][64] = {
2047 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2048 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2049 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2051 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2052 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2053 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2061 bedge_lim_y = filter_level;
2062 bedge_lim_uv = filter_level * 2;
2063 mbedge_lim = filter_level + 2;
2066 bedge_lim_uv = filter_level * 2 + inner_limit;
2067 mbedge_lim = bedge_lim_y + 4;
2070 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2073 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2074 mbedge_lim, inner_limit, hev_thresh);
2075 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2076 mbedge_lim, inner_limit, hev_thresh);
2079 #define H_LOOP_FILTER_16Y_INNER(cond) \
2080 if (cond && inner_filter) { \
2081 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2082 bedge_lim_y, inner_limit, \
2084 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2085 bedge_lim_y, inner_limit, \
2087 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2088 bedge_lim_y, inner_limit, \
2090 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2091 uvlinesize, bedge_lim_uv, \
2092 inner_limit, hev_thresh); \
2095 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2098 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2099 mbedge_lim, inner_limit, hev_thresh);
2100 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2101 mbedge_lim, inner_limit, hev_thresh);
2105 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2106 linesize, bedge_lim_y,
2107 inner_limit, hev_thresh);
2108 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2109 linesize, bedge_lim_y,
2110 inner_limit, hev_thresh);
2111 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2112 linesize, bedge_lim_y,
2113 inner_limit, hev_thresh);
2114 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2115 dst[2] + 4 * uvlinesize,
2116 uvlinesize, bedge_lim_uv,
2117 inner_limit, hev_thresh);
2120 H_LOOP_FILTER_16Y_INNER(is_vp7)
2123 static av_always_inline
2124 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2127 int mbedge_lim, bedge_lim;
2128 int filter_level = f->filter_level;
2129 int inner_limit = f->inner_limit;
2130 int inner_filter = f->inner_filter;
2131 int linesize = s->linesize;
2136 bedge_lim = 2 * filter_level + inner_limit;
2137 mbedge_lim = bedge_lim + 4;
2140 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2142 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2143 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2144 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2148 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2150 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2151 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2152 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2156 #define MARGIN (16 << 2)
2157 static av_always_inline
2158 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2159 VP8Frame *prev_frame, int is_vp7)
2161 VP8Context *s = avctx->priv_data;
2164 s->mv_min.y = -MARGIN;
2165 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2166 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2167 VP8Macroblock *mb = s->macroblocks_base +
2168 ((s->mb_width + 1) * (mb_y + 1) + 1);
2169 int mb_xy = mb_y * s->mb_width;
2171 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2173 s->mv_min.x = -MARGIN;
2174 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2175 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2177 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2178 DC_PRED * 0x01010101);
2179 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2180 prev_frame && prev_frame->seg_map ?
2181 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2190 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2191 VP8Frame *prev_frame)
2193 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2196 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2197 VP8Frame *prev_frame)
2199 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2203 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2205 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2206 if (otd->thread_mb_pos < tmp) { \
2207 pthread_mutex_lock(&otd->lock); \
2208 td->wait_mb_pos = tmp; \
2210 if (otd->thread_mb_pos >= tmp) \
2212 pthread_cond_wait(&otd->cond, &otd->lock); \
2214 td->wait_mb_pos = INT_MAX; \
2215 pthread_mutex_unlock(&otd->lock); \
2219 #define update_pos(td, mb_y, mb_x) \
2221 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2222 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2224 int is_null = !next_td || !prev_td; \
2225 int pos_check = (is_null) ? 1 \
2226 : (next_td != td && \
2227 pos >= next_td->wait_mb_pos) || \
2229 pos >= prev_td->wait_mb_pos); \
2230 td->thread_mb_pos = pos; \
2231 if (sliced_threading && pos_check) { \
2232 pthread_mutex_lock(&td->lock); \
2233 pthread_cond_broadcast(&td->cond); \
2234 pthread_mutex_unlock(&td->lock); \
2238 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2239 #define update_pos(td, mb_y, mb_x)
2242 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2243 int jobnr, int threadnr, int is_vp7)
2245 VP8Context *s = avctx->priv_data;
2246 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2247 int mb_y = td->thread_mb_pos >> 16;
2248 int mb_x, mb_xy = mb_y * s->mb_width;
2249 int num_jobs = s->num_jobs;
2250 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2251 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2254 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2255 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2256 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2261 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2262 if (mb_y == s->mb_height - 1)
2265 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2266 if (s->mb_layout == 1)
2267 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2269 // Make sure the previous frame has read its segmentation map,
2270 // if we re-use the same map.
2271 if (prev_frame && s->segmentation.enabled &&
2272 !s->segmentation.update_map)
2273 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2274 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2275 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2276 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2279 if (!is_vp7 || mb_y == 0)
2280 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2282 s->mv_min.x = -MARGIN;
2283 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2285 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2286 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2287 if (prev_td != td) {
2288 if (threadnr != 0) {
2289 check_thread_pos(td, prev_td,
2290 mb_x + (is_vp7 ? 2 : 1),
2291 mb_y - (is_vp7 ? 2 : 1));
2293 check_thread_pos(td, prev_td,
2294 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2295 mb_y - (is_vp7 ? 2 : 1));
2299 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2301 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2302 dst[2] - dst[1], 2);
2305 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2306 prev_frame && prev_frame->seg_map ?
2307 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2309 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2312 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2314 if (mb->mode <= MODE_I4x4)
2315 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2317 inter_predict(s, td, dst, mb, mb_x, mb_y);
2319 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2322 idct_mb(s, td, dst, mb);
2324 AV_ZERO64(td->left_nnz);
2325 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2327 /* Reset DC block predictors if they would exist
2328 * if the mb had coefficients */
2329 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2330 td->left_nnz[8] = 0;
2331 s->top_nnz[mb_x][8] = 0;
2335 if (s->deblock_filter)
2336 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2338 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2339 if (s->filter.simple)
2340 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2341 NULL, NULL, s->linesize, 0, 1);
2343 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2344 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2347 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2355 if (mb_x == s->mb_width + 1) {
2356 update_pos(td, mb_y, s->mb_width + 3);
2358 update_pos(td, mb_y, mb_x);
2363 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2364 int jobnr, int threadnr, int is_vp7)
2366 VP8Context *s = avctx->priv_data;
2367 VP8ThreadData *td = &s->thread_data[threadnr];
2368 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2369 AVFrame *curframe = s->curframe->tf.f;
2371 VP8ThreadData *prev_td, *next_td;
2373 curframe->data[0] + 16 * mb_y * s->linesize,
2374 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2375 curframe->data[2] + 8 * mb_y * s->uvlinesize
2378 if (s->mb_layout == 1)
2379 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2381 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2386 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2387 if (mb_y == s->mb_height - 1)
2390 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2392 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2393 VP8FilterStrength *f = &td->filter_strength[mb_x];
2395 check_thread_pos(td, prev_td,
2396 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2398 if (next_td != &s->thread_data[0])
2399 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2401 if (num_jobs == 1) {
2402 if (s->filter.simple)
2403 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2404 NULL, NULL, s->linesize, 0, 1);
2406 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2407 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2410 if (s->filter.simple)
2411 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2413 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2418 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2422 static av_always_inline
2423 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2424 int threadnr, int is_vp7)
2426 VP8Context *s = avctx->priv_data;
2427 VP8ThreadData *td = &s->thread_data[jobnr];
2428 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2429 VP8Frame *curframe = s->curframe;
2430 int mb_y, num_jobs = s->num_jobs;
2432 td->thread_nr = threadnr;
2433 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2434 if (mb_y >= s->mb_height)
2436 td->thread_mb_pos = mb_y << 16;
2437 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
2438 if (s->deblock_filter)
2439 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
2440 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2445 if (avctx->active_thread_type == FF_THREAD_FRAME)
2446 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2452 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2453 int jobnr, int threadnr)
2455 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2458 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2459 int jobnr, int threadnr)
2461 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2465 static av_always_inline
2466 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2467 AVPacket *avpkt, int is_vp7)
2469 VP8Context *s = avctx->priv_data;
2470 int ret, i, referenced, num_jobs;
2471 enum AVDiscard skip_thresh;
2472 VP8Frame *av_uninit(curframe), *prev_frame;
2475 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2477 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2482 prev_frame = s->framep[VP56_FRAME_CURRENT];
2484 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2485 s->update_altref == VP56_FRAME_CURRENT;
2487 skip_thresh = !referenced ? AVDISCARD_NONREF
2488 : !s->keyframe ? AVDISCARD_NONKEY
2491 if (avctx->skip_frame >= skip_thresh) {
2493 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2496 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2498 // release no longer referenced frames
2499 for (i = 0; i < 5; i++)
2500 if (s->frames[i].tf.f->data[0] &&
2501 &s->frames[i] != prev_frame &&
2502 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2503 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2504 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2505 vp8_release_frame(s, &s->frames[i]);
2507 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2510 avctx->colorspace = AVCOL_SPC_BT470BG;
2512 avctx->color_range = AVCOL_RANGE_JPEG;
2514 avctx->color_range = AVCOL_RANGE_MPEG;
2516 /* Given that arithmetic probabilities are updated every frame, it's quite
2517 * likely that the values we have on a random interframe are complete
2518 * junk if we didn't start decode on a keyframe. So just don't display
2519 * anything rather than junk. */
2520 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2521 !s->framep[VP56_FRAME_GOLDEN] ||
2522 !s->framep[VP56_FRAME_GOLDEN2])) {
2523 av_log(avctx, AV_LOG_WARNING,
2524 "Discarding interframe without a prior keyframe!\n");
2525 ret = AVERROR_INVALIDDATA;
2529 curframe->tf.f->key_frame = s->keyframe;
2530 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2531 : AV_PICTURE_TYPE_P;
2532 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
2533 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
2537 // check if golden and altref are swapped
2538 if (s->update_altref != VP56_FRAME_NONE)
2539 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2541 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2543 if (s->update_golden != VP56_FRAME_NONE)
2544 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2546 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2549 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2551 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2553 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2555 ff_thread_finish_setup(avctx);
2557 s->linesize = curframe->tf.f->linesize[0];
2558 s->uvlinesize = curframe->tf.f->linesize[1];
2560 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2561 /* Zero macroblock structures for top/top-left prediction
2562 * from outside the frame. */
2564 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2565 (s->mb_width + 1) * sizeof(*s->macroblocks));
2566 if (!s->mb_layout && s->keyframe)
2567 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2569 memset(s->ref_count, 0, sizeof(s->ref_count));
2571 if (s->mb_layout == 1) {
2572 // Make sure the previous frame has read its segmentation map,
2573 // if we re-use the same map.
2574 if (prev_frame && s->segmentation.enabled &&
2575 !s->segmentation.update_map)
2576 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2578 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2580 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2583 if (avctx->active_thread_type == FF_THREAD_FRAME)
2586 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2587 s->num_jobs = num_jobs;
2588 s->curframe = curframe;
2589 s->prev_frame = prev_frame;
2590 s->mv_min.y = -MARGIN;
2591 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2592 for (i = 0; i < MAX_THREADS; i++) {
2593 s->thread_data[i].thread_mb_pos = 0;
2594 s->thread_data[i].wait_mb_pos = INT_MAX;
2597 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2600 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2603 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2604 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2607 // if future frames don't use the updated probabilities,
2608 // reset them to the values we saved
2609 if (!s->update_probabilities)
2610 s->prob[0] = s->prob[1];
2612 if (!s->invisible) {
2613 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2620 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2624 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2627 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2630 #if CONFIG_VP7_DECODER
2631 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2634 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2636 #endif /* CONFIG_VP7_DECODER */
2638 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2640 VP8Context *s = avctx->priv_data;
2643 vp8_decode_flush_impl(avctx, 1);
2644 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2645 av_frame_free(&s->frames[i].tf.f);
2650 static av_cold int vp8_init_frames(VP8Context *s)
2653 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2654 s->frames[i].tf.f = av_frame_alloc();
2655 if (!s->frames[i].tf.f)
2656 return AVERROR(ENOMEM);
2661 static av_always_inline
2662 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2664 VP8Context *s = avctx->priv_data;
2668 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2669 avctx->internal->allocate_progress = 1;
2671 ff_videodsp_init(&s->vdsp, 8);
2673 ff_vp78dsp_init(&s->vp8dsp);
2674 if (CONFIG_VP7_DECODER && is_vp7) {
2675 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2676 ff_vp7dsp_init(&s->vp8dsp);
2677 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2678 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2679 ff_vp8dsp_init(&s->vp8dsp);
2682 /* does not change for VP8 */
2683 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2685 if ((ret = vp8_init_frames(s)) < 0) {
2686 ff_vp8_decode_free(avctx);
2693 #if CONFIG_VP7_DECODER
2694 static int vp7_decode_init(AVCodecContext *avctx)
2696 return vp78_decode_init(avctx, IS_VP7);
2698 #endif /* CONFIG_VP7_DECODER */
2700 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2702 return vp78_decode_init(avctx, IS_VP8);
2705 #if CONFIG_VP8_DECODER
2706 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2708 VP8Context *s = avctx->priv_data;
2713 if ((ret = vp8_init_frames(s)) < 0) {
2714 ff_vp8_decode_free(avctx);
2721 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2723 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2724 const AVCodecContext *src)
2726 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2729 if (s->macroblocks_base &&
2730 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2732 s->mb_width = s_src->mb_width;
2733 s->mb_height = s_src->mb_height;
2736 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2737 s->segmentation = s_src->segmentation;
2738 s->lf_delta = s_src->lf_delta;
2739 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2741 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2742 if (s_src->frames[i].tf.f->data[0]) {
2743 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2749 s->framep[0] = REBASE(s_src->next_framep[0]);
2750 s->framep[1] = REBASE(s_src->next_framep[1]);
2751 s->framep[2] = REBASE(s_src->next_framep[2]);
2752 s->framep[3] = REBASE(s_src->next_framep[3]);
2756 #endif /* CONFIG_VP8_DECODER */
2758 #if CONFIG_VP7_DECODER
2759 AVCodec ff_vp7_decoder = {
2761 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2762 .type = AVMEDIA_TYPE_VIDEO,
2763 .id = AV_CODEC_ID_VP7,
2764 .priv_data_size = sizeof(VP8Context),
2765 .init = vp7_decode_init,
2766 .close = ff_vp8_decode_free,
2767 .decode = vp7_decode_frame,
2768 .capabilities = AV_CODEC_CAP_DR1,
2769 .flush = vp8_decode_flush,
2771 #endif /* CONFIG_VP7_DECODER */
2773 #if CONFIG_VP8_DECODER
2774 AVCodec ff_vp8_decoder = {
2776 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2777 .type = AVMEDIA_TYPE_VIDEO,
2778 .id = AV_CODEC_ID_VP8,
2779 .priv_data_size = sizeof(VP8Context),
2780 .init = ff_vp8_decode_init,
2781 .close = ff_vp8_decode_free,
2782 .decode = ff_vp8_decode_frame,
2783 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2784 AV_CODEC_CAP_SLICE_THREADS,
2785 .flush = vp8_decode_flush,
2786 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2787 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2789 #endif /* CONFIG_VP7_DECODER */