2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 static void free_buffers(VP8Context *s)
44 for (i = 0; i < MAX_THREADS; i++) {
46 pthread_cond_destroy(&s->thread_data[i].cond);
47 pthread_mutex_destroy(&s->thread_data[i].lock);
49 av_freep(&s->thread_data[i].filter_strength);
51 av_freep(&s->thread_data);
52 av_freep(&s->macroblocks_base);
53 av_freep(&s->intra4x4_pred_mode_top);
54 av_freep(&s->top_nnz);
55 av_freep(&s->top_border);
57 s->macroblocks = NULL;
60 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
63 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
64 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
66 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
67 ff_thread_release_buffer(s->avctx, &f->tf);
68 return AVERROR(ENOMEM);
73 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
75 av_buffer_unref(&f->seg_map);
76 ff_thread_release_buffer(s->avctx, &f->tf);
79 #if CONFIG_VP8_DECODER
80 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
84 vp8_release_frame(s, dst);
86 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
89 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
90 vp8_release_frame(s, dst);
91 return AVERROR(ENOMEM);
96 #endif /* CONFIG_VP8_DECODER */
98 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
100 VP8Context *s = avctx->priv_data;
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp8_release_frame(s, &s->frames[i]);
105 memset(s->framep, 0, sizeof(s->framep));
111 static void vp8_decode_flush(AVCodecContext *avctx)
113 vp8_decode_flush_impl(avctx, 0);
116 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
118 VP8Frame *frame = NULL;
121 // find a free buffer
122 for (i = 0; i < 5; i++)
123 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
124 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
125 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
126 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
127 frame = &s->frames[i];
131 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
134 if (frame->tf.f->data[0])
135 vp8_release_frame(s, frame);
140 static av_always_inline
141 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
143 AVCodecContext *avctx = s->avctx;
146 if (width != s->avctx->width ||
147 height != s->avctx->height) {
148 vp8_decode_flush_impl(s->avctx, 1);
150 ret = ff_set_dimensions(s->avctx, width, height);
155 s->mb_width = (s->avctx->coded_width + 15) / 16;
156 s->mb_height = (s->avctx->coded_height + 15) / 16;
158 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
159 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
160 if (!s->mb_layout) { // Frame threading and one thread
161 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
162 sizeof(*s->macroblocks));
163 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
164 } else // Sliced threading
165 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
166 sizeof(*s->macroblocks));
167 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
168 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
169 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
171 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
172 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
174 return AVERROR(ENOMEM);
177 for (i = 0; i < MAX_THREADS; i++) {
178 s->thread_data[i].filter_strength =
179 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
180 if (!s->thread_data[i].filter_strength) {
182 return AVERROR(ENOMEM);
185 pthread_mutex_init(&s->thread_data[i].lock, NULL);
186 pthread_cond_init(&s->thread_data[i].cond, NULL);
190 s->macroblocks = s->macroblocks_base + 1;
195 static int vp7_update_dimensions(VP8Context *s, int width, int height)
197 return update_dimensions(s, width, height, IS_VP7);
200 static int vp8_update_dimensions(VP8Context *s, int width, int height)
202 return update_dimensions(s, width, height, IS_VP8);
205 static void parse_segment_info(VP8Context *s)
207 VP56RangeCoder *c = &s->c;
210 s->segmentation.update_map = vp8_rac_get(c);
212 if (vp8_rac_get(c)) { // update segment feature data
213 s->segmentation.absolute_vals = vp8_rac_get(c);
215 for (i = 0; i < 4; i++)
216 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
218 for (i = 0; i < 4; i++)
219 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
221 if (s->segmentation.update_map)
222 for (i = 0; i < 3; i++)
223 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
226 static void update_lf_deltas(VP8Context *s)
228 VP56RangeCoder *c = &s->c;
231 for (i = 0; i < 4; i++) {
232 if (vp8_rac_get(c)) {
233 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
236 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
240 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
241 if (vp8_rac_get(c)) {
242 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
245 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
250 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
252 const uint8_t *sizes = buf;
255 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
257 buf += 3 * (s->num_coeff_partitions - 1);
258 buf_size -= 3 * (s->num_coeff_partitions - 1);
262 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
263 int size = AV_RL24(sizes + 3 * i);
264 if (buf_size - size < 0)
267 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
271 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
276 static void vp7_get_quants(VP8Context *s)
278 VP56RangeCoder *c = &s->c;
280 int yac_qi = vp8_rac_get_uint(c, 7);
281 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
282 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
283 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
284 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
285 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
287 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
288 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
289 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
290 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
291 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
292 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
295 static void get_quants(VP8Context *s)
297 VP56RangeCoder *c = &s->c;
300 int yac_qi = vp8_rac_get_uint(c, 7);
301 int ydc_delta = vp8_rac_get_sint(c, 4);
302 int y2dc_delta = vp8_rac_get_sint(c, 4);
303 int y2ac_delta = vp8_rac_get_sint(c, 4);
304 int uvdc_delta = vp8_rac_get_sint(c, 4);
305 int uvac_delta = vp8_rac_get_sint(c, 4);
307 for (i = 0; i < 4; i++) {
308 if (s->segmentation.enabled) {
309 base_qi = s->segmentation.base_quant[i];
310 if (!s->segmentation.absolute_vals)
315 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
316 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
317 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
318 /* 101581>>16 is equivalent to 155/100 */
319 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
320 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
321 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
323 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
324 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
329 * Determine which buffers golden and altref should be updated with after this frame.
330 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
332 * Intra frames update all 3 references
333 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
334 * If the update (golden|altref) flag is set, it's updated with the current frame
335 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
336 * If the flag is not set, the number read means:
338 * 1: VP56_FRAME_PREVIOUS
339 * 2: update golden with altref, or update altref with golden
341 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
343 VP56RangeCoder *c = &s->c;
346 return VP56_FRAME_CURRENT;
348 switch (vp8_rac_get_uint(c, 2)) {
350 return VP56_FRAME_PREVIOUS;
352 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
354 return VP56_FRAME_NONE;
357 static void vp78_reset_probability_tables(VP8Context *s)
360 for (i = 0; i < 4; i++)
361 for (j = 0; j < 16; j++)
362 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
363 sizeof(s->prob->token[i][j]));
366 static void vp78_update_probability_tables(VP8Context *s)
368 VP56RangeCoder *c = &s->c;
371 for (i = 0; i < 4; i++)
372 for (j = 0; j < 8; j++)
373 for (k = 0; k < 3; k++)
374 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
375 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
376 int prob = vp8_rac_get_uint(c, 8);
377 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
378 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
382 #define VP7_MVC_SIZE 17
383 #define VP8_MVC_SIZE 19
385 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
388 VP56RangeCoder *c = &s->c;
392 for (i = 0; i < 4; i++)
393 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
395 for (i = 0; i < 3; i++)
396 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
398 // 17.2 MV probability update
399 for (i = 0; i < 2; i++)
400 for (j = 0; j < mvc_size; j++)
401 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
402 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
405 static void update_refs(VP8Context *s)
407 VP56RangeCoder *c = &s->c;
409 int update_golden = vp8_rac_get(c);
410 int update_altref = vp8_rac_get(c);
412 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
413 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
416 static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
420 for (j = 1; j < 3; j++) {
421 for (i = 0; i < height / 2; i++)
422 memcpy(dst->data[j] + i * dst->linesize[j],
423 src->data[j] + i * src->linesize[j], width / 2);
427 static void fade(uint8_t *dst, uint8_t *src,
428 int width, int height, int linesize,
433 for (j = 0; j < height; j++) {
434 for (i = 0; i < width; i++) {
435 uint8_t y = src[j * linesize + i];
436 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
441 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
443 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
444 int beta = (int8_t) vp8_rac_get_uint(c, 8);
447 if (!s->keyframe && (alpha || beta)) {
448 int width = s->mb_width * 16;
449 int height = s->mb_height * 16;
452 if (!s->framep[VP56_FRAME_PREVIOUS])
453 return AVERROR_INVALIDDATA;
456 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
458 /* preserve the golden frame, write a new previous frame */
459 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
460 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
461 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
464 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
466 copy_luma(dst, src, width, height);
469 fade(dst->data[0], src->data[0],
470 width, height, dst->linesize[0], alpha, beta);
476 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
478 VP56RangeCoder *c = &s->c;
479 int part1_size, hscale, vscale, i, j, ret;
480 int width = s->avctx->width;
481 int height = s->avctx->height;
483 s->profile = (buf[0] >> 1) & 7;
484 if (s->profile > 1) {
485 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
486 return AVERROR_INVALIDDATA;
489 s->keyframe = !(buf[0] & 1);
491 part1_size = AV_RL24(buf) >> 4;
493 buf += 4 - s->profile;
494 buf_size -= 4 - s->profile;
496 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
498 ff_vp56_init_range_decoder(c, buf, part1_size);
500 buf_size -= part1_size;
502 /* A. Dimension information (keyframes only) */
504 width = vp8_rac_get_uint(c, 12);
505 height = vp8_rac_get_uint(c, 12);
506 hscale = vp8_rac_get_uint(c, 2);
507 vscale = vp8_rac_get_uint(c, 2);
508 if (hscale || vscale)
509 avpriv_request_sample(s->avctx, "Upscaling");
511 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
512 vp78_reset_probability_tables(s);
513 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
514 sizeof(s->prob->pred16x16));
515 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
516 sizeof(s->prob->pred8x8c));
517 for (i = 0; i < 2; i++)
518 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
519 sizeof(vp7_mv_default_prob[i]));
520 memset(&s->segmentation, 0, sizeof(s->segmentation));
521 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
522 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
525 if (s->keyframe || s->profile > 0)
526 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
528 /* B. Decoding information for all four macroblock-level features */
529 for (i = 0; i < 4; i++) {
530 s->feature_enabled[i] = vp8_rac_get(c);
531 if (s->feature_enabled[i]) {
532 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
534 for (j = 0; j < 3; j++)
535 s->feature_index_prob[i][j] =
536 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
538 if (vp7_feature_value_size[s->profile][i])
539 for (j = 0; j < 4; j++)
540 s->feature_value[i][j] =
541 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
545 s->segmentation.enabled = 0;
546 s->segmentation.update_map = 0;
547 s->lf_delta.enabled = 0;
549 s->num_coeff_partitions = 1;
550 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
552 if (!s->macroblocks_base || /* first frame */
553 width != s->avctx->width || height != s->avctx->height ||
554 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
555 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
559 /* C. Dequantization indices */
562 /* D. Golden frame update flag (a Flag) for interframes only */
564 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
565 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
569 s->update_probabilities = 1;
572 if (s->profile > 0) {
573 s->update_probabilities = vp8_rac_get(c);
574 if (!s->update_probabilities)
575 s->prob[1] = s->prob[0];
578 s->fade_present = vp8_rac_get(c);
581 /* E. Fading information for previous frame */
582 if (s->fade_present && vp8_rac_get(c)) {
583 if ((ret = vp7_fade_frame(s ,c)) < 0)
587 /* F. Loop filter type */
589 s->filter.simple = vp8_rac_get(c);
591 /* G. DCT coefficient ordering specification */
593 for (i = 1; i < 16; i++)
594 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
596 /* H. Loop filter levels */
598 s->filter.simple = vp8_rac_get(c);
599 s->filter.level = vp8_rac_get_uint(c, 6);
600 s->filter.sharpness = vp8_rac_get_uint(c, 3);
602 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
603 vp78_update_probability_tables(s);
605 s->mbskip_enabled = 0;
607 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
609 s->prob->intra = vp8_rac_get_uint(c, 8);
610 s->prob->last = vp8_rac_get_uint(c, 8);
611 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
617 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
619 VP56RangeCoder *c = &s->c;
620 int header_size, hscale, vscale, ret;
621 int width = s->avctx->width;
622 int height = s->avctx->height;
624 s->keyframe = !(buf[0] & 1);
625 s->profile = (buf[0]>>1) & 7;
626 s->invisible = !(buf[0] & 0x10);
627 header_size = AV_RL24(buf) >> 5;
632 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
635 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
636 sizeof(s->put_pixels_tab));
637 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
638 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
639 sizeof(s->put_pixels_tab));
641 if (header_size > buf_size - 7 * s->keyframe) {
642 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
643 return AVERROR_INVALIDDATA;
647 if (AV_RL24(buf) != 0x2a019d) {
648 av_log(s->avctx, AV_LOG_ERROR,
649 "Invalid start code 0x%x\n", AV_RL24(buf));
650 return AVERROR_INVALIDDATA;
652 width = AV_RL16(buf + 3) & 0x3fff;
653 height = AV_RL16(buf + 5) & 0x3fff;
654 hscale = buf[4] >> 6;
655 vscale = buf[6] >> 6;
659 if (hscale || vscale)
660 avpriv_request_sample(s->avctx, "Upscaling");
662 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
663 vp78_reset_probability_tables(s);
664 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
665 sizeof(s->prob->pred16x16));
666 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
667 sizeof(s->prob->pred8x8c));
668 memcpy(s->prob->mvc, vp8_mv_default_prob,
669 sizeof(s->prob->mvc));
670 memset(&s->segmentation, 0, sizeof(s->segmentation));
671 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
674 ff_vp56_init_range_decoder(c, buf, header_size);
676 buf_size -= header_size;
679 s->colorspace = vp8_rac_get(c);
681 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
682 s->fullrange = vp8_rac_get(c);
685 if ((s->segmentation.enabled = vp8_rac_get(c)))
686 parse_segment_info(s);
688 s->segmentation.update_map = 0; // FIXME: move this to some init function?
690 s->filter.simple = vp8_rac_get(c);
691 s->filter.level = vp8_rac_get_uint(c, 6);
692 s->filter.sharpness = vp8_rac_get_uint(c, 3);
694 if ((s->lf_delta.enabled = vp8_rac_get(c)))
698 if (setup_partitions(s, buf, buf_size)) {
699 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
700 return AVERROR_INVALIDDATA;
703 if (!s->macroblocks_base || /* first frame */
704 width != s->avctx->width || height != s->avctx->height)
705 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
712 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
713 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
716 // if we aren't saving this frame's probabilities for future frames,
717 // make a copy of the current probabilities
718 if (!(s->update_probabilities = vp8_rac_get(c)))
719 s->prob[1] = s->prob[0];
721 s->update_last = s->keyframe || vp8_rac_get(c);
723 vp78_update_probability_tables(s);
725 if ((s->mbskip_enabled = vp8_rac_get(c)))
726 s->prob->mbskip = vp8_rac_get_uint(c, 8);
729 s->prob->intra = vp8_rac_get_uint(c, 8);
730 s->prob->last = vp8_rac_get_uint(c, 8);
731 s->prob->golden = vp8_rac_get_uint(c, 8);
732 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
738 static av_always_inline
739 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
741 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
742 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
746 * Motion vector coding, 17.1.
748 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
752 if (vp56_rac_get_prob_branchy(c, p[0])) {
755 for (i = 0; i < 3; i++)
756 x += vp56_rac_get_prob(c, p[9 + i]) << i;
757 for (i = (vp7 ? 7 : 9); i > 3; i--)
758 x += vp56_rac_get_prob(c, p[9 + i]) << i;
759 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
763 const uint8_t *ps = p + 2;
764 bit = vp56_rac_get_prob(c, *ps);
767 bit = vp56_rac_get_prob(c, *ps);
770 x += vp56_rac_get_prob(c, *ps);
773 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
776 static av_always_inline
777 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
780 return vp7_submv_prob;
783 return vp8_submv_prob[4 - !!left];
785 return vp8_submv_prob[2];
786 return vp8_submv_prob[1 - !!left];
790 * Split motion vector prediction, 16.4.
791 * @returns the number of motion vectors parsed (2, 4 or 16)
793 static av_always_inline
794 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
795 int layout, int is_vp7)
799 VP8Macroblock *top_mb;
800 VP8Macroblock *left_mb = &mb[-1];
801 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
802 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
804 VP56mv *left_mv = left_mb->bmv;
805 VP56mv *cur_mv = mb->bmv;
807 if (!layout) // layout is inlined, s->mb_layout is not
810 top_mb = &mb[-s->mb_width - 1];
811 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
812 top_mv = top_mb->bmv;
814 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
815 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
816 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
818 part_idx = VP8_SPLITMVMODE_8x8;
820 part_idx = VP8_SPLITMVMODE_4x4;
823 num = vp8_mbsplit_count[part_idx];
824 mbsplits_cur = vp8_mbsplits[part_idx],
825 firstidx = vp8_mbfirstidx[part_idx];
826 mb->partitioning = part_idx;
828 for (n = 0; n < num; n++) {
830 uint32_t left, above;
831 const uint8_t *submv_prob;
834 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
836 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
838 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
840 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
842 submv_prob = get_submv_prob(left, above, is_vp7);
844 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
845 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
846 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
847 mb->bmv[n].y = mb->mv.y +
848 read_mv_component(c, s->prob->mvc[0], is_vp7);
849 mb->bmv[n].x = mb->mv.x +
850 read_mv_component(c, s->prob->mvc[1], is_vp7);
852 AV_ZERO32(&mb->bmv[n]);
855 AV_WN32A(&mb->bmv[n], above);
858 AV_WN32A(&mb->bmv[n], left);
866 * The vp7 reference decoder uses a padding macroblock column (added to right
867 * edge of the frame) to guard against illegal macroblock offsets. The
868 * algorithm has bugs that permit offsets to straddle the padding column.
869 * This function replicates those bugs.
871 * @param[out] edge_x macroblock x address
872 * @param[out] edge_y macroblock y address
874 * @return macroblock offset legal (boolean)
876 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
877 int xoffset, int yoffset, int boundary,
878 int *edge_x, int *edge_y)
880 int vwidth = mb_width + 1;
881 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
882 if (new < boundary || new % vwidth == vwidth - 1)
884 *edge_y = new / vwidth;
885 *edge_x = new % vwidth;
889 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
891 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
894 static av_always_inline
895 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
896 int mb_x, int mb_y, int layout)
898 VP8Macroblock *mb_edge[12];
899 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
900 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
903 uint8_t cnt[3] = { 0 };
904 VP56RangeCoder *c = &s->c;
907 AV_ZERO32(&near_mv[0]);
908 AV_ZERO32(&near_mv[1]);
909 AV_ZERO32(&near_mv[2]);
911 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
912 const VP7MVPred * pred = &vp7_mv_pred[i];
915 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
916 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
917 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
918 ? s->macroblocks_base + 1 + edge_x +
919 (s->mb_width + 1) * (edge_y + 1)
920 : s->macroblocks + edge_x +
921 (s->mb_height - edge_y - 1) * 2;
922 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
924 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
925 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
927 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
928 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
932 AV_WN32A(&near_mv[CNT_NEAR], mv);
936 AV_WN32A(&near_mv[CNT_NEAREST], mv);
945 cnt[idx] += vp7_mv_pred[i].score;
948 mb->partitioning = VP8_SPLITMVMODE_NONE;
950 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
951 mb->mode = VP8_MVMODE_MV;
953 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
955 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
957 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
958 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
960 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
962 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
963 mb->mode = VP8_MVMODE_SPLIT;
964 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
966 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
967 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
971 mb->mv = near_mv[CNT_NEAR];
975 mb->mv = near_mv[CNT_NEAREST];
979 mb->mode = VP8_MVMODE_ZERO;
985 static av_always_inline
986 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
987 int mb_x, int mb_y, int layout)
989 VP8Macroblock *mb_edge[3] = { 0 /* top */,
992 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
993 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
995 int cur_sign_bias = s->sign_bias[mb->ref_frame];
996 int8_t *sign_bias = s->sign_bias;
998 uint8_t cnt[4] = { 0 };
999 VP56RangeCoder *c = &s->c;
1001 if (!layout) { // layout is inlined (s->mb_layout is not)
1002 mb_edge[0] = mb + 2;
1003 mb_edge[2] = mb + 1;
1005 mb_edge[0] = mb - s->mb_width - 1;
1006 mb_edge[2] = mb - s->mb_width - 2;
1009 AV_ZERO32(&near_mv[0]);
1010 AV_ZERO32(&near_mv[1]);
1011 AV_ZERO32(&near_mv[2]);
1013 /* Process MB on top, left and top-left */
1014 #define MV_EDGE_CHECK(n) \
1016 VP8Macroblock *edge = mb_edge[n]; \
1017 int edge_ref = edge->ref_frame; \
1018 if (edge_ref != VP56_FRAME_CURRENT) { \
1019 uint32_t mv = AV_RN32A(&edge->mv); \
1021 if (cur_sign_bias != sign_bias[edge_ref]) { \
1022 /* SWAR negate of the values in mv. */ \
1024 mv = ((mv & 0x7fff7fff) + \
1025 0x00010001) ^ (mv & 0x80008000); \
1027 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1028 AV_WN32A(&near_mv[++idx], mv); \
1029 cnt[idx] += 1 + (n != 2); \
1031 cnt[CNT_ZERO] += 1 + (n != 2); \
1039 mb->partitioning = VP8_SPLITMVMODE_NONE;
1040 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1041 mb->mode = VP8_MVMODE_MV;
1043 /* If we have three distinct MVs, merge first and last if they're the same */
1044 if (cnt[CNT_SPLITMV] &&
1045 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1046 cnt[CNT_NEAREST] += 1;
1048 /* Swap near and nearest if necessary */
1049 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1050 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1051 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1054 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1055 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1056 /* Choose the best mv out of 0,0 and the nearest mv */
1057 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1058 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1059 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1060 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1062 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1063 mb->mode = VP8_MVMODE_SPLIT;
1064 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1066 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1067 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
1068 mb->bmv[0] = mb->mv;
1071 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1072 mb->bmv[0] = mb->mv;
1075 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1076 mb->bmv[0] = mb->mv;
1079 mb->mode = VP8_MVMODE_ZERO;
1081 mb->bmv[0] = mb->mv;
1085 static av_always_inline
1086 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1087 int mb_x, int keyframe, int layout)
1089 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1092 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1093 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1098 uint8_t *const left = s->intra4x4_pred_mode_left;
1100 top = mb->intra4x4_pred_mode_top;
1102 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1103 for (y = 0; y < 4; y++) {
1104 for (x = 0; x < 4; x++) {
1106 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1107 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1108 left[y] = top[x] = *intra4x4;
1114 for (i = 0; i < 16; i++)
1115 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1116 vp8_pred4x4_prob_inter);
1120 static av_always_inline
1121 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1122 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1124 VP56RangeCoder *c = &s->c;
1125 const char *vp7_feature_name[] = { "q-index",
1127 "partial-golden-update",
1132 for (i = 0; i < 4; i++) {
1133 if (s->feature_enabled[i]) {
1134 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1135 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1136 s->feature_index_prob[i]);
1137 av_log(s->avctx, AV_LOG_WARNING,
1138 "Feature %s present in macroblock (value 0x%x)\n",
1139 vp7_feature_name[i], s->feature_value[i][index]);
1143 } else if (s->segmentation.update_map)
1144 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
1145 else if (s->segmentation.enabled)
1146 *segment = ref ? *ref : *segment;
1147 mb->segment = *segment;
1149 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1152 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1153 vp8_pred16x16_prob_intra);
1155 if (mb->mode == MODE_I4x4) {
1156 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1158 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1159 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1160 if (s->mb_layout == 1)
1161 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1163 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1164 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1167 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1168 vp8_pred8x8c_prob_intra);
1169 mb->ref_frame = VP56_FRAME_CURRENT;
1170 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1172 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1174 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1175 : VP56_FRAME_GOLDEN;
1177 mb->ref_frame = VP56_FRAME_PREVIOUS;
1178 s->ref_count[mb->ref_frame - 1]++;
1180 // motion vectors, 16.3
1182 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1184 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1187 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1189 if (mb->mode == MODE_I4x4)
1190 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1192 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1194 mb->ref_frame = VP56_FRAME_CURRENT;
1195 mb->partitioning = VP8_SPLITMVMODE_NONE;
1196 AV_ZERO32(&mb->bmv[0]);
1201 * @param r arithmetic bitstream reader context
1202 * @param block destination for block coefficients
1203 * @param probs probabilities to use when reading trees from the bitstream
1204 * @param i initial coeff index, 0 unless a separate DC block is coded
1205 * @param qmul array holding the dc/ac dequant factor at position 0/1
1207 * @return 0 if no coeffs were decoded
1208 * otherwise, the index of the last coeff decoded plus one
1210 static av_always_inline
1211 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1212 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1213 int i, uint8_t *token_prob, int16_t qmul[2],
1214 const uint8_t scan[16], int vp7)
1216 VP56RangeCoder c = *r;
1221 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1225 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1227 break; // invalid input; blocks should end with EOB
1228 token_prob = probs[i][0];
1234 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1236 token_prob = probs[i + 1][1];
1238 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1239 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1241 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1245 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1246 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1247 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1248 } else { // DCT_CAT2
1250 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1251 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1253 } else { // DCT_CAT3 and up
1254 int a = vp56_rac_get_prob(&c, token_prob[8]);
1255 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1256 int cat = (a << 1) + b;
1257 coeff = 3 + (8 << cat);
1258 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1261 token_prob = probs[i + 1][2];
1263 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1270 static av_always_inline
1271 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1273 int16_t dc = block[0];
1281 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1282 block[0] = pred[0] = dc;
1287 block[0] = pred[0] = dc;
1293 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1295 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1296 int i, uint8_t *token_prob,
1298 const uint8_t scan[16])
1300 return decode_block_coeffs_internal(r, block, probs, i,
1301 token_prob, qmul, scan, IS_VP7);
1304 #ifndef vp8_decode_block_coeffs_internal
1305 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1307 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1308 int i, uint8_t *token_prob,
1311 return decode_block_coeffs_internal(r, block, probs, i,
1312 token_prob, qmul, zigzag_scan, IS_VP8);
1317 * @param c arithmetic bitstream reader context
1318 * @param block destination for block coefficients
1319 * @param probs probabilities to use when reading trees from the bitstream
1320 * @param i initial coeff index, 0 unless a separate DC block is coded
1321 * @param zero_nhood the initial prediction context for number of surrounding
1322 * all-zero blocks (only left/top, so 0-2)
1323 * @param qmul array holding the dc/ac dequant factor at position 0/1
1325 * @return 0 if no coeffs were decoded
1326 * otherwise, the index of the last coeff decoded plus one
1328 static av_always_inline
1329 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1330 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1331 int i, int zero_nhood, int16_t qmul[2],
1332 const uint8_t scan[16], int vp7)
1334 uint8_t *token_prob = probs[i][zero_nhood];
1335 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1337 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1338 token_prob, qmul, scan)
1339 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1343 static av_always_inline
1344 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1345 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1348 int i, x, y, luma_start = 0, luma_ctx = 3;
1349 int nnz_pred, nnz, nnz_total = 0;
1350 int segment = mb->segment;
1353 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1354 nnz_pred = t_nnz[8] + l_nnz[8];
1356 // decode DC values and do hadamard
1357 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1358 nnz_pred, s->qmat[segment].luma_dc_qmul,
1359 zigzag_scan, is_vp7);
1360 l_nnz[8] = t_nnz[8] = !!nnz;
1362 if (is_vp7 && mb->mode > MODE_I4x4) {
1363 nnz |= inter_predict_dc(td->block_dc,
1364 s->inter_dc_pred[mb->ref_frame - 1]);
1371 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1373 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1380 for (y = 0; y < 4; y++)
1381 for (x = 0; x < 4; x++) {
1382 nnz_pred = l_nnz[y] + t_nnz[x];
1383 nnz = decode_block_coeffs(c, td->block[y][x],
1384 s->prob->token[luma_ctx],
1385 luma_start, nnz_pred,
1386 s->qmat[segment].luma_qmul,
1387 s->prob[0].scan, is_vp7);
1388 /* nnz+block_dc may be one more than the actual last index,
1389 * but we don't care */
1390 td->non_zero_count_cache[y][x] = nnz + block_dc;
1391 t_nnz[x] = l_nnz[y] = !!nnz;
1396 // TODO: what to do about dimensions? 2nd dim for luma is x,
1397 // but for chroma it's (y<<1)|x
1398 for (i = 4; i < 6; i++)
1399 for (y = 0; y < 2; y++)
1400 for (x = 0; x < 2; x++) {
1401 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1402 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1403 s->prob->token[2], 0, nnz_pred,
1404 s->qmat[segment].chroma_qmul,
1405 s->prob[0].scan, is_vp7);
1406 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1407 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1411 // if there were no coded coeffs despite the macroblock not being marked skip,
1412 // we MUST not do the inner loop filter and should not do IDCT
1413 // Since skip isn't used for bitstream prediction, just manually set it.
1418 static av_always_inline
1419 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1420 uint8_t *src_cb, uint8_t *src_cr,
1421 int linesize, int uvlinesize, int simple)
1423 AV_COPY128(top_border, src_y + 15 * linesize);
1425 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1426 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1430 static av_always_inline
1431 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1432 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1433 int mb_y, int mb_width, int simple, int xchg)
1435 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1437 src_cb -= uvlinesize;
1438 src_cr -= uvlinesize;
1440 #define XCHG(a, b, xchg) \
1448 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1449 XCHG(top_border, src_y, xchg);
1450 XCHG(top_border + 8, src_y + 8, 1);
1451 if (mb_x < mb_width - 1)
1452 XCHG(top_border + 32, src_y + 16, 1);
1454 // only copy chroma for normal loop filter
1455 // or to initialize the top row to 127
1456 if (!simple || !mb_y) {
1457 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1458 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1459 XCHG(top_border + 16, src_cb, 1);
1460 XCHG(top_border + 24, src_cr, 1);
1464 static av_always_inline
1465 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1468 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1470 return mb_y ? mode : LEFT_DC_PRED8x8;
1473 static av_always_inline
1474 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1477 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1479 return mb_y ? mode : HOR_PRED8x8;
1482 static av_always_inline
1483 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1487 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1489 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1491 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1492 case PLANE_PRED8x8: /* TM */
1493 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1498 static av_always_inline
1499 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1502 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1504 return mb_y ? mode : HOR_VP8_PRED;
1508 static av_always_inline
1509 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1510 int *copy_buf, int vp7)
1514 if (!mb_x && mb_y) {
1519 case DIAG_DOWN_LEFT_PRED:
1520 case VERT_LEFT_PRED:
1521 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1529 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1531 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1532 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1533 * as 16x16/8x8 DC */
1534 case DIAG_DOWN_RIGHT_PRED:
1535 case VERT_RIGHT_PRED:
1544 static av_always_inline
1545 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1546 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1548 int x, y, mode, nnz;
1551 /* for the first row, we need to run xchg_mb_border to init the top edge
1552 * to 127 otherwise, skip it if we aren't going to deblock */
1553 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1554 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1555 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1556 s->filter.simple, 1);
1558 if (mb->mode < MODE_I4x4) {
1559 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1560 s->hpc.pred16x16[mode](dst[0], s->linesize);
1562 uint8_t *ptr = dst[0];
1563 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1564 const uint8_t lo = is_vp7 ? 128 : 127;
1565 const uint8_t hi = is_vp7 ? 128 : 129;
1566 uint8_t tr_top[4] = { lo, lo, lo, lo };
1568 // all blocks on the right edge of the macroblock use bottom edge
1569 // the top macroblock for their topright edge
1570 uint8_t *tr_right = ptr - s->linesize + 16;
1572 // if we're on the right edge of the frame, said edge is extended
1573 // from the top macroblock
1574 if (mb_y && mb_x == s->mb_width - 1) {
1575 tr = tr_right[-1] * 0x01010101u;
1576 tr_right = (uint8_t *) &tr;
1580 AV_ZERO128(td->non_zero_count_cache);
1582 for (y = 0; y < 4; y++) {
1583 uint8_t *topright = ptr + 4 - s->linesize;
1584 for (x = 0; x < 4; x++) {
1585 int copy = 0, linesize = s->linesize;
1586 uint8_t *dst = ptr + 4 * x;
1587 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1589 if ((y == 0 || x == 3) && mb_y == 0) {
1592 topright = tr_right;
1594 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1595 mb_y + y, ©, is_vp7);
1597 dst = copy_dst + 12;
1601 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1603 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1607 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1616 copy_dst[11] = ptr[4 * x - 1];
1617 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1618 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1619 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1622 s->hpc.pred4x4[mode](dst, topright, linesize);
1624 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1625 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1626 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1627 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1630 nnz = td->non_zero_count_cache[y][x];
1633 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1634 td->block[y][x], s->linesize);
1636 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1637 td->block[y][x], s->linesize);
1642 ptr += 4 * s->linesize;
1647 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1648 mb_x, mb_y, is_vp7);
1649 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1650 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1652 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1653 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1654 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1655 s->filter.simple, 0);
1658 static const uint8_t subpel_idx[3][8] = {
1659 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1660 // also function pointer index
1661 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1662 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1668 * @param s VP8 decoding context
1669 * @param dst target buffer for block data at block position
1670 * @param ref reference picture buffer at origin (0, 0)
1671 * @param mv motion vector (relative to block position) to get pixel data from
1672 * @param x_off horizontal position of block from origin (0, 0)
1673 * @param y_off vertical position of block from origin (0, 0)
1674 * @param block_w width of block (16, 8 or 4)
1675 * @param block_h height of block (always same as block_w)
1676 * @param width width of src/dst plane data
1677 * @param height height of src/dst plane data
1678 * @param linesize size of a single line of plane data, including padding
1679 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1681 static av_always_inline
1682 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1683 ThreadFrame *ref, const VP56mv *mv,
1684 int x_off, int y_off, int block_w, int block_h,
1685 int width, int height, ptrdiff_t linesize,
1686 vp8_mc_func mc_func[3][3])
1688 uint8_t *src = ref->f->data[0];
1691 int src_linesize = linesize;
1693 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1694 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1696 x_off += mv->x >> 2;
1697 y_off += mv->y >> 2;
1700 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1701 src += y_off * linesize + x_off;
1702 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1703 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1704 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1705 src - my_idx * linesize - mx_idx,
1706 EDGE_EMU_LINESIZE, linesize,
1707 block_w + subpel_idx[1][mx],
1708 block_h + subpel_idx[1][my],
1709 x_off - mx_idx, y_off - my_idx,
1711 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1712 src_linesize = EDGE_EMU_LINESIZE;
1714 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1716 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1717 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1718 linesize, block_h, 0, 0);
1723 * chroma MC function
1725 * @param s VP8 decoding context
1726 * @param dst1 target buffer for block data at block position (U plane)
1727 * @param dst2 target buffer for block data at block position (V plane)
1728 * @param ref reference picture buffer at origin (0, 0)
1729 * @param mv motion vector (relative to block position) to get pixel data from
1730 * @param x_off horizontal position of block from origin (0, 0)
1731 * @param y_off vertical position of block from origin (0, 0)
1732 * @param block_w width of block (16, 8 or 4)
1733 * @param block_h height of block (always same as block_w)
1734 * @param width width of src/dst plane data
1735 * @param height height of src/dst plane data
1736 * @param linesize size of a single line of plane data, including padding
1737 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1739 static av_always_inline
1740 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1741 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1742 int x_off, int y_off, int block_w, int block_h,
1743 int width, int height, ptrdiff_t linesize,
1744 vp8_mc_func mc_func[3][3])
1746 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1749 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1750 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1752 x_off += mv->x >> 3;
1753 y_off += mv->y >> 3;
1756 src1 += y_off * linesize + x_off;
1757 src2 += y_off * linesize + x_off;
1758 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1759 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1760 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1761 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1762 src1 - my_idx * linesize - mx_idx,
1763 EDGE_EMU_LINESIZE, linesize,
1764 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1765 x_off - mx_idx, y_off - my_idx, width, height);
1766 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1767 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1769 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1770 src2 - my_idx * linesize - mx_idx,
1771 EDGE_EMU_LINESIZE, linesize,
1772 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1773 x_off - mx_idx, y_off - my_idx, width, height);
1774 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1775 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1777 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1778 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1781 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1782 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1783 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1787 static av_always_inline
1788 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1789 ThreadFrame *ref_frame, int x_off, int y_off,
1790 int bx_off, int by_off, int block_w, int block_h,
1791 int width, int height, VP56mv *mv)
1796 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1797 ref_frame, mv, x_off + bx_off, y_off + by_off,
1798 block_w, block_h, width, height, s->linesize,
1799 s->put_pixels_tab[block_w == 8]);
1802 if (s->profile == 3) {
1803 /* this block only applies VP8; it is safe to check
1804 * only the profile, as VP7 profile <= 1 */
1816 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1817 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1818 &uvmv, x_off + bx_off, y_off + by_off,
1819 block_w, block_h, width, height, s->uvlinesize,
1820 s->put_pixels_tab[1 + (block_w == 4)]);
1823 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1824 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1825 static av_always_inline
1826 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1829 /* Don't prefetch refs that haven't been used very often this frame. */
1830 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1831 int x_off = mb_x << 4, y_off = mb_y << 4;
1832 int mx = (mb->mv.x >> 2) + x_off + 8;
1833 int my = (mb->mv.y >> 2) + y_off;
1834 uint8_t **src = s->framep[ref]->tf.f->data;
1835 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1836 /* For threading, a ff_thread_await_progress here might be useful, but
1837 * it actually slows down the decoder. Since a bad prefetch doesn't
1838 * generate bad decoder output, we don't run it here. */
1839 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1840 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1841 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1846 * Apply motion vectors to prediction buffer, chapter 18.
1848 static av_always_inline
1849 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1850 VP8Macroblock *mb, int mb_x, int mb_y)
1852 int x_off = mb_x << 4, y_off = mb_y << 4;
1853 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1854 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1855 VP56mv *bmv = mb->bmv;
1857 switch (mb->partitioning) {
1858 case VP8_SPLITMVMODE_NONE:
1859 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1860 0, 0, 16, 16, width, height, &mb->mv);
1862 case VP8_SPLITMVMODE_4x4: {
1867 for (y = 0; y < 4; y++) {
1868 for (x = 0; x < 4; x++) {
1869 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1870 ref, &bmv[4 * y + x],
1871 4 * x + x_off, 4 * y + y_off, 4, 4,
1872 width, height, s->linesize,
1873 s->put_pixels_tab[2]);
1882 for (y = 0; y < 2; y++) {
1883 for (x = 0; x < 2; x++) {
1884 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1885 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1886 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1887 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1888 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1889 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1890 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1891 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1892 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1893 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1894 if (s->profile == 3) {
1898 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1899 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1900 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1901 width, height, s->uvlinesize,
1902 s->put_pixels_tab[2]);
1907 case VP8_SPLITMVMODE_16x8:
1908 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1909 0, 0, 16, 8, width, height, &bmv[0]);
1910 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1911 0, 8, 16, 8, width, height, &bmv[1]);
1913 case VP8_SPLITMVMODE_8x16:
1914 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1915 0, 0, 8, 16, width, height, &bmv[0]);
1916 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1917 8, 0, 8, 16, width, height, &bmv[1]);
1919 case VP8_SPLITMVMODE_8x8:
1920 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1921 0, 0, 8, 8, width, height, &bmv[0]);
1922 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1923 8, 0, 8, 8, width, height, &bmv[1]);
1924 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1925 0, 8, 8, 8, width, height, &bmv[2]);
1926 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1927 8, 8, 8, 8, width, height, &bmv[3]);
1932 static av_always_inline
1933 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1937 if (mb->mode != MODE_I4x4) {
1938 uint8_t *y_dst = dst[0];
1939 for (y = 0; y < 4; y++) {
1940 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1942 if (nnz4 & ~0x01010101) {
1943 for (x = 0; x < 4; x++) {
1944 if ((uint8_t) nnz4 == 1)
1945 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1948 else if ((uint8_t) nnz4 > 1)
1949 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1957 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1960 y_dst += 4 * s->linesize;
1964 for (ch = 0; ch < 2; ch++) {
1965 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1967 uint8_t *ch_dst = dst[1 + ch];
1968 if (nnz4 & ~0x01010101) {
1969 for (y = 0; y < 2; y++) {
1970 for (x = 0; x < 2; x++) {
1971 if ((uint8_t) nnz4 == 1)
1972 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1973 td->block[4 + ch][(y << 1) + x],
1975 else if ((uint8_t) nnz4 > 1)
1976 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
1977 td->block[4 + ch][(y << 1) + x],
1981 goto chroma_idct_end;
1983 ch_dst += 4 * s->uvlinesize;
1986 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
1994 static av_always_inline
1995 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
1996 VP8FilterStrength *f, int is_vp7)
1998 int interior_limit, filter_level;
2000 if (s->segmentation.enabled) {
2001 filter_level = s->segmentation.filter_level[mb->segment];
2002 if (!s->segmentation.absolute_vals)
2003 filter_level += s->filter.level;
2005 filter_level = s->filter.level;
2007 if (s->lf_delta.enabled) {
2008 filter_level += s->lf_delta.ref[mb->ref_frame];
2009 filter_level += s->lf_delta.mode[mb->mode];
2012 filter_level = av_clip_uintp2(filter_level, 6);
2014 interior_limit = filter_level;
2015 if (s->filter.sharpness) {
2016 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2017 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2019 interior_limit = FFMAX(interior_limit, 1);
2021 f->filter_level = filter_level;
2022 f->inner_limit = interior_limit;
2023 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2024 mb->mode == VP8_MVMODE_SPLIT;
2027 static av_always_inline
2028 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2029 int mb_x, int mb_y, int is_vp7)
2031 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2032 int filter_level = f->filter_level;
2033 int inner_limit = f->inner_limit;
2034 int inner_filter = f->inner_filter;
2035 int linesize = s->linesize;
2036 int uvlinesize = s->uvlinesize;
2037 static const uint8_t hev_thresh_lut[2][64] = {
2038 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2039 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2040 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2042 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2043 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2044 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2052 bedge_lim_y = filter_level;
2053 bedge_lim_uv = filter_level * 2;
2054 mbedge_lim = filter_level + 2;
2057 bedge_lim_uv = filter_level * 2 + inner_limit;
2058 mbedge_lim = bedge_lim_y + 4;
2061 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2064 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2065 mbedge_lim, inner_limit, hev_thresh);
2066 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2067 mbedge_lim, inner_limit, hev_thresh);
2070 #define H_LOOP_FILTER_16Y_INNER(cond) \
2071 if (cond && inner_filter) { \
2072 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2073 bedge_lim_y, inner_limit, \
2075 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2076 bedge_lim_y, inner_limit, \
2078 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2079 bedge_lim_y, inner_limit, \
2081 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2082 uvlinesize, bedge_lim_uv, \
2083 inner_limit, hev_thresh); \
2086 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2089 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2090 mbedge_lim, inner_limit, hev_thresh);
2091 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2092 mbedge_lim, inner_limit, hev_thresh);
2096 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2097 linesize, bedge_lim_y,
2098 inner_limit, hev_thresh);
2099 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2100 linesize, bedge_lim_y,
2101 inner_limit, hev_thresh);
2102 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2103 linesize, bedge_lim_y,
2104 inner_limit, hev_thresh);
2105 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2106 dst[2] + 4 * uvlinesize,
2107 uvlinesize, bedge_lim_uv,
2108 inner_limit, hev_thresh);
2111 H_LOOP_FILTER_16Y_INNER(is_vp7)
2114 static av_always_inline
2115 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2118 int mbedge_lim, bedge_lim;
2119 int filter_level = f->filter_level;
2120 int inner_limit = f->inner_limit;
2121 int inner_filter = f->inner_filter;
2122 int linesize = s->linesize;
2127 bedge_lim = 2 * filter_level + inner_limit;
2128 mbedge_lim = bedge_lim + 4;
2131 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2133 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2134 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2135 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2139 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2141 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2142 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2143 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2147 #define MARGIN (16 << 2)
2148 static av_always_inline
2149 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2150 VP8Frame *prev_frame, int is_vp7)
2152 VP8Context *s = avctx->priv_data;
2155 s->mv_min.y = -MARGIN;
2156 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2157 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2158 VP8Macroblock *mb = s->macroblocks_base +
2159 ((s->mb_width + 1) * (mb_y + 1) + 1);
2160 int mb_xy = mb_y * s->mb_width;
2162 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2164 s->mv_min.x = -MARGIN;
2165 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2166 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2168 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2169 DC_PRED * 0x01010101);
2170 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2171 prev_frame && prev_frame->seg_map ?
2172 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2181 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2182 VP8Frame *prev_frame)
2184 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2187 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2188 VP8Frame *prev_frame)
2190 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2194 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2196 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2197 if (otd->thread_mb_pos < tmp) { \
2198 pthread_mutex_lock(&otd->lock); \
2199 td->wait_mb_pos = tmp; \
2201 if (otd->thread_mb_pos >= tmp) \
2203 pthread_cond_wait(&otd->cond, &otd->lock); \
2205 td->wait_mb_pos = INT_MAX; \
2206 pthread_mutex_unlock(&otd->lock); \
2210 #define update_pos(td, mb_y, mb_x) \
2212 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2213 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2215 int is_null = !next_td || !prev_td; \
2216 int pos_check = (is_null) ? 1 \
2217 : (next_td != td && \
2218 pos >= next_td->wait_mb_pos) || \
2220 pos >= prev_td->wait_mb_pos); \
2221 td->thread_mb_pos = pos; \
2222 if (sliced_threading && pos_check) { \
2223 pthread_mutex_lock(&td->lock); \
2224 pthread_cond_broadcast(&td->cond); \
2225 pthread_mutex_unlock(&td->lock); \
2229 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2230 #define update_pos(td, mb_y, mb_x)
2233 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2234 int jobnr, int threadnr, int is_vp7)
2236 VP8Context *s = avctx->priv_data;
2237 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2238 int mb_y = td->thread_mb_pos >> 16;
2239 int mb_x, mb_xy = mb_y * s->mb_width;
2240 int num_jobs = s->num_jobs;
2241 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2242 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2245 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2246 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2247 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2252 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2253 if (mb_y == s->mb_height - 1)
2256 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2257 if (s->mb_layout == 1)
2258 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2260 // Make sure the previous frame has read its segmentation map,
2261 // if we re-use the same map.
2262 if (prev_frame && s->segmentation.enabled &&
2263 !s->segmentation.update_map)
2264 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2265 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2266 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2267 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2270 if (!is_vp7 || mb_y == 0)
2271 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2273 s->mv_min.x = -MARGIN;
2274 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2276 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2277 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2278 if (prev_td != td) {
2279 if (threadnr != 0) {
2280 check_thread_pos(td, prev_td,
2281 mb_x + (is_vp7 ? 2 : 1),
2282 mb_y - (is_vp7 ? 2 : 1));
2284 check_thread_pos(td, prev_td,
2285 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2286 mb_y - (is_vp7 ? 2 : 1));
2290 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2292 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2293 dst[2] - dst[1], 2);
2296 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2297 prev_frame && prev_frame->seg_map ?
2298 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2300 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2303 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2305 if (mb->mode <= MODE_I4x4)
2306 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2308 inter_predict(s, td, dst, mb, mb_x, mb_y);
2310 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2313 idct_mb(s, td, dst, mb);
2315 AV_ZERO64(td->left_nnz);
2316 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2318 /* Reset DC block predictors if they would exist
2319 * if the mb had coefficients */
2320 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2321 td->left_nnz[8] = 0;
2322 s->top_nnz[mb_x][8] = 0;
2326 if (s->deblock_filter)
2327 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2329 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2330 if (s->filter.simple)
2331 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2332 NULL, NULL, s->linesize, 0, 1);
2334 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2335 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2338 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2346 if (mb_x == s->mb_width + 1) {
2347 update_pos(td, mb_y, s->mb_width + 3);
2349 update_pos(td, mb_y, mb_x);
2354 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2355 int jobnr, int threadnr, int is_vp7)
2357 VP8Context *s = avctx->priv_data;
2358 VP8ThreadData *td = &s->thread_data[threadnr];
2359 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2360 AVFrame *curframe = s->curframe->tf.f;
2362 VP8ThreadData *prev_td, *next_td;
2364 curframe->data[0] + 16 * mb_y * s->linesize,
2365 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2366 curframe->data[2] + 8 * mb_y * s->uvlinesize
2369 if (s->mb_layout == 1)
2370 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2372 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2377 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2378 if (mb_y == s->mb_height - 1)
2381 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2383 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2384 VP8FilterStrength *f = &td->filter_strength[mb_x];
2386 check_thread_pos(td, prev_td,
2387 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2389 if (next_td != &s->thread_data[0])
2390 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2392 if (num_jobs == 1) {
2393 if (s->filter.simple)
2394 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2395 NULL, NULL, s->linesize, 0, 1);
2397 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2398 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2401 if (s->filter.simple)
2402 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2404 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2409 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2413 static av_always_inline
2414 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2415 int threadnr, int is_vp7)
2417 VP8Context *s = avctx->priv_data;
2418 VP8ThreadData *td = &s->thread_data[jobnr];
2419 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2420 VP8Frame *curframe = s->curframe;
2421 int mb_y, num_jobs = s->num_jobs;
2423 td->thread_nr = threadnr;
2424 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2425 if (mb_y >= s->mb_height)
2427 td->thread_mb_pos = mb_y << 16;
2428 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
2429 if (s->deblock_filter)
2430 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
2431 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2436 if (avctx->active_thread_type == FF_THREAD_FRAME)
2437 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2443 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2444 int jobnr, int threadnr)
2446 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2449 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2450 int jobnr, int threadnr)
2452 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2456 static av_always_inline
2457 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2458 AVPacket *avpkt, int is_vp7)
2460 VP8Context *s = avctx->priv_data;
2461 int ret, i, referenced, num_jobs;
2462 enum AVDiscard skip_thresh;
2463 VP8Frame *av_uninit(curframe), *prev_frame;
2466 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2468 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2473 prev_frame = s->framep[VP56_FRAME_CURRENT];
2475 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2476 s->update_altref == VP56_FRAME_CURRENT;
2478 skip_thresh = !referenced ? AVDISCARD_NONREF
2479 : !s->keyframe ? AVDISCARD_NONKEY
2482 if (avctx->skip_frame >= skip_thresh) {
2484 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2487 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2489 // release no longer referenced frames
2490 for (i = 0; i < 5; i++)
2491 if (s->frames[i].tf.f->data[0] &&
2492 &s->frames[i] != prev_frame &&
2493 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2494 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2495 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2496 vp8_release_frame(s, &s->frames[i]);
2498 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2501 avctx->colorspace = AVCOL_SPC_BT470BG;
2503 avctx->color_range = AVCOL_RANGE_JPEG;
2505 avctx->color_range = AVCOL_RANGE_MPEG;
2507 /* Given that arithmetic probabilities are updated every frame, it's quite
2508 * likely that the values we have on a random interframe are complete
2509 * junk if we didn't start decode on a keyframe. So just don't display
2510 * anything rather than junk. */
2511 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2512 !s->framep[VP56_FRAME_GOLDEN] ||
2513 !s->framep[VP56_FRAME_GOLDEN2])) {
2514 av_log(avctx, AV_LOG_WARNING,
2515 "Discarding interframe without a prior keyframe!\n");
2516 ret = AVERROR_INVALIDDATA;
2520 curframe->tf.f->key_frame = s->keyframe;
2521 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2522 : AV_PICTURE_TYPE_P;
2523 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
2524 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
2528 // check if golden and altref are swapped
2529 if (s->update_altref != VP56_FRAME_NONE)
2530 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2532 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2534 if (s->update_golden != VP56_FRAME_NONE)
2535 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2537 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2540 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2542 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2544 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2546 ff_thread_finish_setup(avctx);
2548 s->linesize = curframe->tf.f->linesize[0];
2549 s->uvlinesize = curframe->tf.f->linesize[1];
2551 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2552 /* Zero macroblock structures for top/top-left prediction
2553 * from outside the frame. */
2555 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2556 (s->mb_width + 1) * sizeof(*s->macroblocks));
2557 if (!s->mb_layout && s->keyframe)
2558 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2560 memset(s->ref_count, 0, sizeof(s->ref_count));
2562 if (s->mb_layout == 1) {
2563 // Make sure the previous frame has read its segmentation map,
2564 // if we re-use the same map.
2565 if (prev_frame && s->segmentation.enabled &&
2566 !s->segmentation.update_map)
2567 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2569 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2571 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2574 if (avctx->active_thread_type == FF_THREAD_FRAME)
2577 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2578 s->num_jobs = num_jobs;
2579 s->curframe = curframe;
2580 s->prev_frame = prev_frame;
2581 s->mv_min.y = -MARGIN;
2582 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2583 for (i = 0; i < MAX_THREADS; i++) {
2584 s->thread_data[i].thread_mb_pos = 0;
2585 s->thread_data[i].wait_mb_pos = INT_MAX;
2588 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2591 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2594 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2595 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2598 // if future frames don't use the updated probabilities,
2599 // reset them to the values we saved
2600 if (!s->update_probabilities)
2601 s->prob[0] = s->prob[1];
2603 if (!s->invisible) {
2604 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2611 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2615 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2618 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2621 #if CONFIG_VP7_DECODER
2622 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2625 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2627 #endif /* CONFIG_VP7_DECODER */
2629 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2631 VP8Context *s = avctx->priv_data;
2634 vp8_decode_flush_impl(avctx, 1);
2635 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2636 av_frame_free(&s->frames[i].tf.f);
2641 static av_cold int vp8_init_frames(VP8Context *s)
2644 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2645 s->frames[i].tf.f = av_frame_alloc();
2646 if (!s->frames[i].tf.f)
2647 return AVERROR(ENOMEM);
2652 static av_always_inline
2653 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2655 VP8Context *s = avctx->priv_data;
2659 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2660 avctx->internal->allocate_progress = 1;
2662 ff_videodsp_init(&s->vdsp, 8);
2664 ff_vp78dsp_init(&s->vp8dsp);
2665 if (CONFIG_VP7_DECODER && is_vp7) {
2666 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2667 ff_vp7dsp_init(&s->vp8dsp);
2668 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2669 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2670 ff_vp8dsp_init(&s->vp8dsp);
2673 /* does not change for VP8 */
2674 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2676 if ((ret = vp8_init_frames(s)) < 0) {
2677 ff_vp8_decode_free(avctx);
2684 #if CONFIG_VP7_DECODER
2685 static int vp7_decode_init(AVCodecContext *avctx)
2687 return vp78_decode_init(avctx, IS_VP7);
2689 #endif /* CONFIG_VP7_DECODER */
2691 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2693 return vp78_decode_init(avctx, IS_VP8);
2696 #if CONFIG_VP8_DECODER
2697 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2699 VP8Context *s = avctx->priv_data;
2704 if ((ret = vp8_init_frames(s)) < 0) {
2705 ff_vp8_decode_free(avctx);
2712 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2714 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2715 const AVCodecContext *src)
2717 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2720 if (s->macroblocks_base &&
2721 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2723 s->mb_width = s_src->mb_width;
2724 s->mb_height = s_src->mb_height;
2727 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2728 s->segmentation = s_src->segmentation;
2729 s->lf_delta = s_src->lf_delta;
2730 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2732 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2733 if (s_src->frames[i].tf.f->data[0]) {
2734 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2740 s->framep[0] = REBASE(s_src->next_framep[0]);
2741 s->framep[1] = REBASE(s_src->next_framep[1]);
2742 s->framep[2] = REBASE(s_src->next_framep[2]);
2743 s->framep[3] = REBASE(s_src->next_framep[3]);
2747 #endif /* CONFIG_VP8_DECODER */
2749 #if CONFIG_VP7_DECODER
2750 AVCodec ff_vp7_decoder = {
2752 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2753 .type = AVMEDIA_TYPE_VIDEO,
2754 .id = AV_CODEC_ID_VP7,
2755 .priv_data_size = sizeof(VP8Context),
2756 .init = vp7_decode_init,
2757 .close = ff_vp8_decode_free,
2758 .decode = vp7_decode_frame,
2759 .capabilities = CODEC_CAP_DR1,
2760 .flush = vp8_decode_flush,
2762 #endif /* CONFIG_VP7_DECODER */
2764 #if CONFIG_VP8_DECODER
2765 AVCodec ff_vp8_decoder = {
2767 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2768 .type = AVMEDIA_TYPE_VIDEO,
2769 .id = AV_CODEC_ID_VP8,
2770 .priv_data_size = sizeof(VP8Context),
2771 .init = ff_vp8_decode_init,
2772 .close = ff_vp8_decode_free,
2773 .decode = ff_vp8_decode_frame,
2774 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2775 .flush = vp8_decode_flush,
2776 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2777 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2779 #endif /* CONFIG_VP7_DECODER */