2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 static void free_buffers(VP8Context *s)
44 for (i = 0; i < MAX_THREADS; i++) {
46 pthread_cond_destroy(&s->thread_data[i].cond);
47 pthread_mutex_destroy(&s->thread_data[i].lock);
49 av_freep(&s->thread_data[i].filter_strength);
51 av_freep(&s->thread_data);
52 av_freep(&s->macroblocks_base);
53 av_freep(&s->intra4x4_pred_mode_top);
54 av_freep(&s->top_nnz);
55 av_freep(&s->top_border);
57 s->macroblocks = NULL;
60 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
63 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
64 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
66 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
67 ff_thread_release_buffer(s->avctx, &f->tf);
68 return AVERROR(ENOMEM);
73 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
75 av_buffer_unref(&f->seg_map);
76 ff_thread_release_buffer(s->avctx, &f->tf);
79 #if CONFIG_VP8_DECODER
80 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
84 vp8_release_frame(s, dst);
86 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
89 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
90 vp8_release_frame(s, dst);
91 return AVERROR(ENOMEM);
96 #endif /* CONFIG_VP8_DECODER */
98 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
100 VP8Context *s = avctx->priv_data;
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp8_release_frame(s, &s->frames[i]);
105 memset(s->framep, 0, sizeof(s->framep));
111 static void vp8_decode_flush(AVCodecContext *avctx)
113 vp8_decode_flush_impl(avctx, 0);
116 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
118 VP8Frame *frame = NULL;
121 // find a free buffer
122 for (i = 0; i < 5; i++)
123 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
124 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
125 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
126 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
127 frame = &s->frames[i];
131 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
134 if (frame->tf.f->data[0])
135 vp8_release_frame(s, frame);
140 static av_always_inline
141 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
143 AVCodecContext *avctx = s->avctx;
146 if (width != s->avctx->width ||
147 height != s->avctx->height) {
148 vp8_decode_flush_impl(s->avctx, 1);
150 ret = ff_set_dimensions(s->avctx, width, height);
155 s->mb_width = (s->avctx->coded_width + 15) / 16;
156 s->mb_height = (s->avctx->coded_height + 15) / 16;
158 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
159 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
160 if (!s->mb_layout) { // Frame threading and one thread
161 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
162 sizeof(*s->macroblocks));
163 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
164 } else // Sliced threading
165 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
166 sizeof(*s->macroblocks));
167 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
168 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
169 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
171 for (i = 0; i < MAX_THREADS; i++) {
172 s->thread_data[i].filter_strength =
173 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
175 pthread_mutex_init(&s->thread_data[i].lock, NULL);
176 pthread_cond_init(&s->thread_data[i].cond, NULL);
180 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
181 (!s->intra4x4_pred_mode_top && !s->mb_layout))
182 return AVERROR(ENOMEM);
184 s->macroblocks = s->macroblocks_base + 1;
189 static int vp7_update_dimensions(VP8Context *s, int width, int height)
191 return update_dimensions(s, width, height, IS_VP7);
194 static int vp8_update_dimensions(VP8Context *s, int width, int height)
196 return update_dimensions(s, width, height, IS_VP8);
199 static void parse_segment_info(VP8Context *s)
201 VP56RangeCoder *c = &s->c;
204 s->segmentation.update_map = vp8_rac_get(c);
206 if (vp8_rac_get(c)) { // update segment feature data
207 s->segmentation.absolute_vals = vp8_rac_get(c);
209 for (i = 0; i < 4; i++)
210 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
212 for (i = 0; i < 4; i++)
213 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
215 if (s->segmentation.update_map)
216 for (i = 0; i < 3; i++)
217 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
220 static void update_lf_deltas(VP8Context *s)
222 VP56RangeCoder *c = &s->c;
225 for (i = 0; i < 4; i++) {
226 if (vp8_rac_get(c)) {
227 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
230 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
234 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
235 if (vp8_rac_get(c)) {
236 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
239 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
244 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
246 const uint8_t *sizes = buf;
249 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
251 buf += 3 * (s->num_coeff_partitions - 1);
252 buf_size -= 3 * (s->num_coeff_partitions - 1);
256 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
257 int size = AV_RL24(sizes + 3 * i);
258 if (buf_size - size < 0)
261 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
265 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
270 static void vp7_get_quants(VP8Context *s)
272 VP56RangeCoder *c = &s->c;
274 int yac_qi = vp8_rac_get_uint(c, 7);
275 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
276 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
277 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
278 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
279 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
281 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
282 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
283 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
284 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
285 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
286 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
289 static void get_quants(VP8Context *s)
291 VP56RangeCoder *c = &s->c;
294 int yac_qi = vp8_rac_get_uint(c, 7);
295 int ydc_delta = vp8_rac_get_sint(c, 4);
296 int y2dc_delta = vp8_rac_get_sint(c, 4);
297 int y2ac_delta = vp8_rac_get_sint(c, 4);
298 int uvdc_delta = vp8_rac_get_sint(c, 4);
299 int uvac_delta = vp8_rac_get_sint(c, 4);
301 for (i = 0; i < 4; i++) {
302 if (s->segmentation.enabled) {
303 base_qi = s->segmentation.base_quant[i];
304 if (!s->segmentation.absolute_vals)
309 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
310 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
311 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
312 /* 101581>>16 is equivalent to 155/100 */
313 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
314 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
315 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
317 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
318 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
323 * Determine which buffers golden and altref should be updated with after this frame.
324 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
326 * Intra frames update all 3 references
327 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
328 * If the update (golden|altref) flag is set, it's updated with the current frame
329 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
330 * If the flag is not set, the number read means:
332 * 1: VP56_FRAME_PREVIOUS
333 * 2: update golden with altref, or update altref with golden
335 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
337 VP56RangeCoder *c = &s->c;
340 return VP56_FRAME_CURRENT;
342 switch (vp8_rac_get_uint(c, 2)) {
344 return VP56_FRAME_PREVIOUS;
346 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
348 return VP56_FRAME_NONE;
351 static void vp78_reset_probability_tables(VP8Context *s)
354 for (i = 0; i < 4; i++)
355 for (j = 0; j < 16; j++)
356 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
357 sizeof(s->prob->token[i][j]));
360 static void vp78_update_probability_tables(VP8Context *s)
362 VP56RangeCoder *c = &s->c;
365 for (i = 0; i < 4; i++)
366 for (j = 0; j < 8; j++)
367 for (k = 0; k < 3; k++)
368 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
369 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
370 int prob = vp8_rac_get_uint(c, 8);
371 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
372 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
376 #define VP7_MVC_SIZE 17
377 #define VP8_MVC_SIZE 19
379 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
382 VP56RangeCoder *c = &s->c;
386 for (i = 0; i < 4; i++)
387 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
389 for (i = 0; i < 3; i++)
390 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
392 // 17.2 MV probability update
393 for (i = 0; i < 2; i++)
394 for (j = 0; j < mvc_size; j++)
395 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
396 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
399 static void update_refs(VP8Context *s)
401 VP56RangeCoder *c = &s->c;
403 int update_golden = vp8_rac_get(c);
404 int update_altref = vp8_rac_get(c);
406 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
407 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
410 static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
414 for (j = 1; j < 3; j++) {
415 for (i = 0; i < height / 2; i++)
416 memcpy(dst->data[j] + i * dst->linesize[j],
417 src->data[j] + i * src->linesize[j], width / 2);
421 static void fade(uint8_t *dst, uint8_t *src,
422 int width, int height, int linesize,
427 for (j = 0; j < height; j++) {
428 for (i = 0; i < width; i++) {
429 uint8_t y = src[j * linesize + i];
430 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
435 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
437 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
438 int beta = (int8_t) vp8_rac_get_uint(c, 8);
441 if (!s->keyframe && (alpha || beta)) {
442 int width = s->mb_width * 16;
443 int height = s->mb_height * 16;
446 if (!s->framep[VP56_FRAME_PREVIOUS])
447 return AVERROR_INVALIDDATA;
450 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
452 /* preserve the golden frame, write a new previous frame */
453 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
454 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
455 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
458 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
460 copy_luma(dst, src, width, height);
463 fade(dst->data[0], src->data[0],
464 width, height, dst->linesize[0], alpha, beta);
470 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
472 VP56RangeCoder *c = &s->c;
473 int part1_size, hscale, vscale, i, j, ret;
474 int width = s->avctx->width;
475 int height = s->avctx->height;
477 s->profile = (buf[0] >> 1) & 7;
478 if (s->profile > 1) {
479 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
480 return AVERROR_INVALIDDATA;
483 s->keyframe = !(buf[0] & 1);
485 part1_size = AV_RL24(buf) >> 4;
487 buf += 4 - s->profile;
488 buf_size -= 4 - s->profile;
490 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
492 ff_vp56_init_range_decoder(c, buf, part1_size);
494 buf_size -= part1_size;
496 /* A. Dimension information (keyframes only) */
498 width = vp8_rac_get_uint(c, 12);
499 height = vp8_rac_get_uint(c, 12);
500 hscale = vp8_rac_get_uint(c, 2);
501 vscale = vp8_rac_get_uint(c, 2);
502 if (hscale || vscale)
503 avpriv_request_sample(s->avctx, "Upscaling");
505 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
506 vp78_reset_probability_tables(s);
507 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
508 sizeof(s->prob->pred16x16));
509 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
510 sizeof(s->prob->pred8x8c));
511 for (i = 0; i < 2; i++)
512 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
513 sizeof(vp7_mv_default_prob[i]));
514 memset(&s->segmentation, 0, sizeof(s->segmentation));
515 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
516 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
519 if (s->keyframe || s->profile > 0)
520 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
522 /* B. Decoding information for all four macroblock-level features */
523 for (i = 0; i < 4; i++) {
524 s->feature_enabled[i] = vp8_rac_get(c);
525 if (s->feature_enabled[i]) {
526 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
528 for (j = 0; j < 3; j++)
529 s->feature_index_prob[i][j] =
530 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
532 if (vp7_feature_value_size[s->profile][i])
533 for (j = 0; j < 4; j++)
534 s->feature_value[i][j] =
535 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
539 s->segmentation.enabled = 0;
540 s->segmentation.update_map = 0;
541 s->lf_delta.enabled = 0;
543 s->num_coeff_partitions = 1;
544 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
546 if (!s->macroblocks_base || /* first frame */
547 width != s->avctx->width || height != s->avctx->height ||
548 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
549 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
553 /* C. Dequantization indices */
556 /* D. Golden frame update flag (a Flag) for interframes only */
558 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
559 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
563 s->update_probabilities = 1;
566 if (s->profile > 0) {
567 s->update_probabilities = vp8_rac_get(c);
568 if (!s->update_probabilities)
569 s->prob[1] = s->prob[0];
572 s->fade_present = vp8_rac_get(c);
575 /* E. Fading information for previous frame */
576 if (s->fade_present && vp8_rac_get(c)) {
577 if ((ret = vp7_fade_frame(s ,c)) < 0)
581 /* F. Loop filter type */
583 s->filter.simple = vp8_rac_get(c);
585 /* G. DCT coefficient ordering specification */
587 for (i = 1; i < 16; i++)
588 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
590 /* H. Loop filter levels */
592 s->filter.simple = vp8_rac_get(c);
593 s->filter.level = vp8_rac_get_uint(c, 6);
594 s->filter.sharpness = vp8_rac_get_uint(c, 3);
596 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
597 vp78_update_probability_tables(s);
599 s->mbskip_enabled = 0;
601 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
603 s->prob->intra = vp8_rac_get_uint(c, 8);
604 s->prob->last = vp8_rac_get_uint(c, 8);
605 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
611 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
613 VP56RangeCoder *c = &s->c;
614 int header_size, hscale, vscale, ret;
615 int width = s->avctx->width;
616 int height = s->avctx->height;
618 s->keyframe = !(buf[0] & 1);
619 s->profile = (buf[0]>>1) & 7;
620 s->invisible = !(buf[0] & 0x10);
621 header_size = AV_RL24(buf) >> 5;
626 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
629 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
630 sizeof(s->put_pixels_tab));
631 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
632 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
633 sizeof(s->put_pixels_tab));
635 if (header_size > buf_size - 7 * s->keyframe) {
636 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
637 return AVERROR_INVALIDDATA;
641 if (AV_RL24(buf) != 0x2a019d) {
642 av_log(s->avctx, AV_LOG_ERROR,
643 "Invalid start code 0x%x\n", AV_RL24(buf));
644 return AVERROR_INVALIDDATA;
646 width = AV_RL16(buf + 3) & 0x3fff;
647 height = AV_RL16(buf + 5) & 0x3fff;
648 hscale = buf[4] >> 6;
649 vscale = buf[6] >> 6;
653 if (hscale || vscale)
654 avpriv_request_sample(s->avctx, "Upscaling");
656 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
657 vp78_reset_probability_tables(s);
658 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
659 sizeof(s->prob->pred16x16));
660 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
661 sizeof(s->prob->pred8x8c));
662 memcpy(s->prob->mvc, vp8_mv_default_prob,
663 sizeof(s->prob->mvc));
664 memset(&s->segmentation, 0, sizeof(s->segmentation));
665 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
668 ff_vp56_init_range_decoder(c, buf, header_size);
670 buf_size -= header_size;
673 s->colorspace = vp8_rac_get(c);
675 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
676 s->fullrange = vp8_rac_get(c);
679 if ((s->segmentation.enabled = vp8_rac_get(c)))
680 parse_segment_info(s);
682 s->segmentation.update_map = 0; // FIXME: move this to some init function?
684 s->filter.simple = vp8_rac_get(c);
685 s->filter.level = vp8_rac_get_uint(c, 6);
686 s->filter.sharpness = vp8_rac_get_uint(c, 3);
688 if ((s->lf_delta.enabled = vp8_rac_get(c)))
692 if (setup_partitions(s, buf, buf_size)) {
693 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
694 return AVERROR_INVALIDDATA;
697 if (!s->macroblocks_base || /* first frame */
698 width != s->avctx->width || height != s->avctx->height)
699 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
706 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
707 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
710 // if we aren't saving this frame's probabilities for future frames,
711 // make a copy of the current probabilities
712 if (!(s->update_probabilities = vp8_rac_get(c)))
713 s->prob[1] = s->prob[0];
715 s->update_last = s->keyframe || vp8_rac_get(c);
717 vp78_update_probability_tables(s);
719 if ((s->mbskip_enabled = vp8_rac_get(c)))
720 s->prob->mbskip = vp8_rac_get_uint(c, 8);
723 s->prob->intra = vp8_rac_get_uint(c, 8);
724 s->prob->last = vp8_rac_get_uint(c, 8);
725 s->prob->golden = vp8_rac_get_uint(c, 8);
726 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
732 static av_always_inline
733 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
735 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
736 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
740 * Motion vector coding, 17.1.
742 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
746 if (vp56_rac_get_prob_branchy(c, p[0])) {
749 for (i = 0; i < 3; i++)
750 x += vp56_rac_get_prob(c, p[9 + i]) << i;
751 for (i = (vp7 ? 7 : 9); i > 3; i--)
752 x += vp56_rac_get_prob(c, p[9 + i]) << i;
753 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
757 const uint8_t *ps = p + 2;
758 bit = vp56_rac_get_prob(c, *ps);
761 bit = vp56_rac_get_prob(c, *ps);
764 x += vp56_rac_get_prob(c, *ps);
767 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
770 static av_always_inline
771 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
774 return vp7_submv_prob;
777 return vp8_submv_prob[4 - !!left];
779 return vp8_submv_prob[2];
780 return vp8_submv_prob[1 - !!left];
784 * Split motion vector prediction, 16.4.
785 * @returns the number of motion vectors parsed (2, 4 or 16)
787 static av_always_inline
788 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
789 int layout, int is_vp7)
793 VP8Macroblock *top_mb;
794 VP8Macroblock *left_mb = &mb[-1];
795 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
796 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
798 VP56mv *left_mv = left_mb->bmv;
799 VP56mv *cur_mv = mb->bmv;
801 if (!layout) // layout is inlined, s->mb_layout is not
804 top_mb = &mb[-s->mb_width - 1];
805 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
806 top_mv = top_mb->bmv;
808 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
809 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
810 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
812 part_idx = VP8_SPLITMVMODE_8x8;
814 part_idx = VP8_SPLITMVMODE_4x4;
817 num = vp8_mbsplit_count[part_idx];
818 mbsplits_cur = vp8_mbsplits[part_idx],
819 firstidx = vp8_mbfirstidx[part_idx];
820 mb->partitioning = part_idx;
822 for (n = 0; n < num; n++) {
824 uint32_t left, above;
825 const uint8_t *submv_prob;
828 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
830 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
832 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
834 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
836 submv_prob = get_submv_prob(left, above, is_vp7);
838 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
839 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
840 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
841 mb->bmv[n].y = mb->mv.y +
842 read_mv_component(c, s->prob->mvc[0], is_vp7);
843 mb->bmv[n].x = mb->mv.x +
844 read_mv_component(c, s->prob->mvc[1], is_vp7);
846 AV_ZERO32(&mb->bmv[n]);
849 AV_WN32A(&mb->bmv[n], above);
852 AV_WN32A(&mb->bmv[n], left);
860 * The vp7 reference decoder uses a padding macroblock column (added to right
861 * edge of the frame) to guard against illegal macroblock offsets. The
862 * algorithm has bugs that permit offsets to straddle the padding column.
863 * This function replicates those bugs.
865 * @param[out] edge_x macroblock x address
866 * @param[out] edge_y macroblock y address
868 * @return macroblock offset legal (boolean)
870 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
871 int xoffset, int yoffset, int boundary,
872 int *edge_x, int *edge_y)
874 int vwidth = mb_width + 1;
875 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
876 if (new < boundary || new % vwidth == vwidth - 1)
878 *edge_y = new / vwidth;
879 *edge_x = new % vwidth;
883 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
885 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
888 static av_always_inline
889 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
890 int mb_x, int mb_y, int layout)
892 VP8Macroblock *mb_edge[12];
893 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
894 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
897 uint8_t cnt[3] = { 0 };
898 VP56RangeCoder *c = &s->c;
901 AV_ZERO32(&near_mv[0]);
902 AV_ZERO32(&near_mv[1]);
903 AV_ZERO32(&near_mv[2]);
905 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
906 const VP7MVPred * pred = &vp7_mv_pred[i];
909 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
910 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
911 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
912 ? s->macroblocks_base + 1 + edge_x +
913 (s->mb_width + 1) * (edge_y + 1)
914 : s->macroblocks + edge_x +
915 (s->mb_height - edge_y - 1) * 2;
916 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
918 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
919 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
921 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
922 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
926 AV_WN32A(&near_mv[CNT_NEAR], mv);
930 AV_WN32A(&near_mv[CNT_NEAREST], mv);
939 cnt[idx] += vp7_mv_pred[i].score;
942 mb->partitioning = VP8_SPLITMVMODE_NONE;
944 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
945 mb->mode = VP8_MVMODE_MV;
947 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
949 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
951 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
952 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
954 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
956 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
957 mb->mode = VP8_MVMODE_SPLIT;
958 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
960 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
961 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
965 mb->mv = near_mv[CNT_NEAR];
969 mb->mv = near_mv[CNT_NEAREST];
973 mb->mode = VP8_MVMODE_ZERO;
979 static av_always_inline
980 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
981 int mb_x, int mb_y, int layout)
983 VP8Macroblock *mb_edge[3] = { 0 /* top */,
986 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
987 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
989 int cur_sign_bias = s->sign_bias[mb->ref_frame];
990 int8_t *sign_bias = s->sign_bias;
992 uint8_t cnt[4] = { 0 };
993 VP56RangeCoder *c = &s->c;
995 if (!layout) { // layout is inlined (s->mb_layout is not)
999 mb_edge[0] = mb - s->mb_width - 1;
1000 mb_edge[2] = mb - s->mb_width - 2;
1003 AV_ZERO32(&near_mv[0]);
1004 AV_ZERO32(&near_mv[1]);
1005 AV_ZERO32(&near_mv[2]);
1007 /* Process MB on top, left and top-left */
1008 #define MV_EDGE_CHECK(n) \
1010 VP8Macroblock *edge = mb_edge[n]; \
1011 int edge_ref = edge->ref_frame; \
1012 if (edge_ref != VP56_FRAME_CURRENT) { \
1013 uint32_t mv = AV_RN32A(&edge->mv); \
1015 if (cur_sign_bias != sign_bias[edge_ref]) { \
1016 /* SWAR negate of the values in mv. */ \
1018 mv = ((mv & 0x7fff7fff) + \
1019 0x00010001) ^ (mv & 0x80008000); \
1021 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1022 AV_WN32A(&near_mv[++idx], mv); \
1023 cnt[idx] += 1 + (n != 2); \
1025 cnt[CNT_ZERO] += 1 + (n != 2); \
1033 mb->partitioning = VP8_SPLITMVMODE_NONE;
1034 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1035 mb->mode = VP8_MVMODE_MV;
1037 /* If we have three distinct MVs, merge first and last if they're the same */
1038 if (cnt[CNT_SPLITMV] &&
1039 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1040 cnt[CNT_NEAREST] += 1;
1042 /* Swap near and nearest if necessary */
1043 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1044 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1045 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1048 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1049 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1050 /* Choose the best mv out of 0,0 and the nearest mv */
1051 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1052 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1053 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1054 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1056 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1057 mb->mode = VP8_MVMODE_SPLIT;
1058 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1060 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1061 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
1062 mb->bmv[0] = mb->mv;
1065 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1066 mb->bmv[0] = mb->mv;
1069 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1070 mb->bmv[0] = mb->mv;
1073 mb->mode = VP8_MVMODE_ZERO;
1075 mb->bmv[0] = mb->mv;
1079 static av_always_inline
1080 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1081 int mb_x, int keyframe, int layout)
1083 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1086 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1087 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1092 uint8_t *const left = s->intra4x4_pred_mode_left;
1094 top = mb->intra4x4_pred_mode_top;
1096 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1097 for (y = 0; y < 4; y++) {
1098 for (x = 0; x < 4; x++) {
1100 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1101 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1102 left[y] = top[x] = *intra4x4;
1108 for (i = 0; i < 16; i++)
1109 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1110 vp8_pred4x4_prob_inter);
1114 static av_always_inline
1115 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1116 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1118 VP56RangeCoder *c = &s->c;
1119 const char *vp7_feature_name[] = { "q-index",
1121 "partial-golden-update",
1126 for (i = 0; i < 4; i++) {
1127 if (s->feature_enabled[i]) {
1128 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1129 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1130 s->feature_index_prob[i]);
1131 av_log(s->avctx, AV_LOG_WARNING,
1132 "Feature %s present in macroblock (value 0x%x)\n",
1133 vp7_feature_name[i], s->feature_value[i][index]);
1137 } else if (s->segmentation.update_map)
1138 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
1139 else if (s->segmentation.enabled)
1140 *segment = ref ? *ref : *segment;
1141 mb->segment = *segment;
1143 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1146 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1147 vp8_pred16x16_prob_intra);
1149 if (mb->mode == MODE_I4x4) {
1150 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1152 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1153 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1154 if (s->mb_layout == 1)
1155 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1157 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1158 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1161 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1162 vp8_pred8x8c_prob_intra);
1163 mb->ref_frame = VP56_FRAME_CURRENT;
1164 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1166 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1168 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1169 : VP56_FRAME_GOLDEN;
1171 mb->ref_frame = VP56_FRAME_PREVIOUS;
1172 s->ref_count[mb->ref_frame - 1]++;
1174 // motion vectors, 16.3
1176 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1178 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1181 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1183 if (mb->mode == MODE_I4x4)
1184 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1186 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1188 mb->ref_frame = VP56_FRAME_CURRENT;
1189 mb->partitioning = VP8_SPLITMVMODE_NONE;
1190 AV_ZERO32(&mb->bmv[0]);
1195 * @param r arithmetic bitstream reader context
1196 * @param block destination for block coefficients
1197 * @param probs probabilities to use when reading trees from the bitstream
1198 * @param i initial coeff index, 0 unless a separate DC block is coded
1199 * @param qmul array holding the dc/ac dequant factor at position 0/1
1201 * @return 0 if no coeffs were decoded
1202 * otherwise, the index of the last coeff decoded plus one
1204 static av_always_inline
1205 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1206 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1207 int i, uint8_t *token_prob, int16_t qmul[2],
1208 const uint8_t scan[16], int vp7)
1210 VP56RangeCoder c = *r;
1215 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1219 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1221 break; // invalid input; blocks should end with EOB
1222 token_prob = probs[i][0];
1228 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1230 token_prob = probs[i + 1][1];
1232 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1233 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1235 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1239 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1240 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1241 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1242 } else { // DCT_CAT2
1244 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1245 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1247 } else { // DCT_CAT3 and up
1248 int a = vp56_rac_get_prob(&c, token_prob[8]);
1249 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1250 int cat = (a << 1) + b;
1251 coeff = 3 + (8 << cat);
1252 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1255 token_prob = probs[i + 1][2];
1257 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1264 static av_always_inline
1265 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1267 int16_t dc = block[0];
1275 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1276 block[0] = pred[0] = dc;
1281 block[0] = pred[0] = dc;
1287 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1289 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1290 int i, uint8_t *token_prob,
1292 const uint8_t scan[16])
1294 return decode_block_coeffs_internal(r, block, probs, i,
1295 token_prob, qmul, scan, IS_VP7);
1298 #ifndef vp8_decode_block_coeffs_internal
1299 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1301 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1302 int i, uint8_t *token_prob,
1305 return decode_block_coeffs_internal(r, block, probs, i,
1306 token_prob, qmul, zigzag_scan, IS_VP8);
1311 * @param c arithmetic bitstream reader context
1312 * @param block destination for block coefficients
1313 * @param probs probabilities to use when reading trees from the bitstream
1314 * @param i initial coeff index, 0 unless a separate DC block is coded
1315 * @param zero_nhood the initial prediction context for number of surrounding
1316 * all-zero blocks (only left/top, so 0-2)
1317 * @param qmul array holding the dc/ac dequant factor at position 0/1
1319 * @return 0 if no coeffs were decoded
1320 * otherwise, the index of the last coeff decoded plus one
1322 static av_always_inline
1323 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1324 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1325 int i, int zero_nhood, int16_t qmul[2],
1326 const uint8_t scan[16], int vp7)
1328 uint8_t *token_prob = probs[i][zero_nhood];
1329 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1331 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1332 token_prob, qmul, scan)
1333 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1337 static av_always_inline
1338 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1339 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1342 int i, x, y, luma_start = 0, luma_ctx = 3;
1343 int nnz_pred, nnz, nnz_total = 0;
1344 int segment = mb->segment;
1347 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1348 nnz_pred = t_nnz[8] + l_nnz[8];
1350 // decode DC values and do hadamard
1351 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1352 nnz_pred, s->qmat[segment].luma_dc_qmul,
1353 zigzag_scan, is_vp7);
1354 l_nnz[8] = t_nnz[8] = !!nnz;
1356 if (is_vp7 && mb->mode > MODE_I4x4) {
1357 nnz |= inter_predict_dc(td->block_dc,
1358 s->inter_dc_pred[mb->ref_frame - 1]);
1365 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1367 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1374 for (y = 0; y < 4; y++)
1375 for (x = 0; x < 4; x++) {
1376 nnz_pred = l_nnz[y] + t_nnz[x];
1377 nnz = decode_block_coeffs(c, td->block[y][x],
1378 s->prob->token[luma_ctx],
1379 luma_start, nnz_pred,
1380 s->qmat[segment].luma_qmul,
1381 s->prob[0].scan, is_vp7);
1382 /* nnz+block_dc may be one more than the actual last index,
1383 * but we don't care */
1384 td->non_zero_count_cache[y][x] = nnz + block_dc;
1385 t_nnz[x] = l_nnz[y] = !!nnz;
1390 // TODO: what to do about dimensions? 2nd dim for luma is x,
1391 // but for chroma it's (y<<1)|x
1392 for (i = 4; i < 6; i++)
1393 for (y = 0; y < 2; y++)
1394 for (x = 0; x < 2; x++) {
1395 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1396 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1397 s->prob->token[2], 0, nnz_pred,
1398 s->qmat[segment].chroma_qmul,
1399 s->prob[0].scan, is_vp7);
1400 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1401 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1405 // if there were no coded coeffs despite the macroblock not being marked skip,
1406 // we MUST not do the inner loop filter and should not do IDCT
1407 // Since skip isn't used for bitstream prediction, just manually set it.
1412 static av_always_inline
1413 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1414 uint8_t *src_cb, uint8_t *src_cr,
1415 int linesize, int uvlinesize, int simple)
1417 AV_COPY128(top_border, src_y + 15 * linesize);
1419 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1420 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1424 static av_always_inline
1425 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1426 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1427 int mb_y, int mb_width, int simple, int xchg)
1429 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1431 src_cb -= uvlinesize;
1432 src_cr -= uvlinesize;
1434 #define XCHG(a, b, xchg) \
1442 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1443 XCHG(top_border, src_y, xchg);
1444 XCHG(top_border + 8, src_y + 8, 1);
1445 if (mb_x < mb_width - 1)
1446 XCHG(top_border + 32, src_y + 16, 1);
1448 // only copy chroma for normal loop filter
1449 // or to initialize the top row to 127
1450 if (!simple || !mb_y) {
1451 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1452 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1453 XCHG(top_border + 16, src_cb, 1);
1454 XCHG(top_border + 24, src_cr, 1);
1458 static av_always_inline
1459 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1462 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1464 return mb_y ? mode : LEFT_DC_PRED8x8;
1467 static av_always_inline
1468 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1471 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1473 return mb_y ? mode : HOR_PRED8x8;
1476 static av_always_inline
1477 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1481 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1483 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1485 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1486 case PLANE_PRED8x8: /* TM */
1487 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1492 static av_always_inline
1493 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1496 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1498 return mb_y ? mode : HOR_VP8_PRED;
1502 static av_always_inline
1503 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1504 int *copy_buf, int vp7)
1508 if (!mb_x && mb_y) {
1513 case DIAG_DOWN_LEFT_PRED:
1514 case VERT_LEFT_PRED:
1515 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1523 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1525 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1526 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1527 * as 16x16/8x8 DC */
1528 case DIAG_DOWN_RIGHT_PRED:
1529 case VERT_RIGHT_PRED:
1538 static av_always_inline
1539 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1540 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1542 int x, y, mode, nnz;
1545 /* for the first row, we need to run xchg_mb_border to init the top edge
1546 * to 127 otherwise, skip it if we aren't going to deblock */
1547 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1548 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1549 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1550 s->filter.simple, 1);
1552 if (mb->mode < MODE_I4x4) {
1553 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1554 s->hpc.pred16x16[mode](dst[0], s->linesize);
1556 uint8_t *ptr = dst[0];
1557 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1558 const uint8_t lo = is_vp7 ? 128 : 127;
1559 const uint8_t hi = is_vp7 ? 128 : 129;
1560 uint8_t tr_top[4] = { lo, lo, lo, lo };
1562 // all blocks on the right edge of the macroblock use bottom edge
1563 // the top macroblock for their topright edge
1564 uint8_t *tr_right = ptr - s->linesize + 16;
1566 // if we're on the right edge of the frame, said edge is extended
1567 // from the top macroblock
1568 if (mb_y && mb_x == s->mb_width - 1) {
1569 tr = tr_right[-1] * 0x01010101u;
1570 tr_right = (uint8_t *) &tr;
1574 AV_ZERO128(td->non_zero_count_cache);
1576 for (y = 0; y < 4; y++) {
1577 uint8_t *topright = ptr + 4 - s->linesize;
1578 for (x = 0; x < 4; x++) {
1579 int copy = 0, linesize = s->linesize;
1580 uint8_t *dst = ptr + 4 * x;
1581 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1583 if ((y == 0 || x == 3) && mb_y == 0) {
1586 topright = tr_right;
1588 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1589 mb_y + y, ©, is_vp7);
1591 dst = copy_dst + 12;
1595 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1597 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1601 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1610 copy_dst[11] = ptr[4 * x - 1];
1611 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1612 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1613 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1616 s->hpc.pred4x4[mode](dst, topright, linesize);
1618 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1619 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1620 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1621 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1624 nnz = td->non_zero_count_cache[y][x];
1627 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1628 td->block[y][x], s->linesize);
1630 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1631 td->block[y][x], s->linesize);
1636 ptr += 4 * s->linesize;
1641 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1642 mb_x, mb_y, is_vp7);
1643 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1644 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1646 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1647 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1648 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1649 s->filter.simple, 0);
1652 static const uint8_t subpel_idx[3][8] = {
1653 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1654 // also function pointer index
1655 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1656 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1662 * @param s VP8 decoding context
1663 * @param dst target buffer for block data at block position
1664 * @param ref reference picture buffer at origin (0, 0)
1665 * @param mv motion vector (relative to block position) to get pixel data from
1666 * @param x_off horizontal position of block from origin (0, 0)
1667 * @param y_off vertical position of block from origin (0, 0)
1668 * @param block_w width of block (16, 8 or 4)
1669 * @param block_h height of block (always same as block_w)
1670 * @param width width of src/dst plane data
1671 * @param height height of src/dst plane data
1672 * @param linesize size of a single line of plane data, including padding
1673 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1675 static av_always_inline
1676 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1677 ThreadFrame *ref, const VP56mv *mv,
1678 int x_off, int y_off, int block_w, int block_h,
1679 int width, int height, ptrdiff_t linesize,
1680 vp8_mc_func mc_func[3][3])
1682 uint8_t *src = ref->f->data[0];
1685 int src_linesize = linesize;
1687 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1688 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1690 x_off += mv->x >> 2;
1691 y_off += mv->y >> 2;
1694 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1695 src += y_off * linesize + x_off;
1696 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1697 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1698 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1699 src - my_idx * linesize - mx_idx,
1700 EDGE_EMU_LINESIZE, linesize,
1701 block_w + subpel_idx[1][mx],
1702 block_h + subpel_idx[1][my],
1703 x_off - mx_idx, y_off - my_idx,
1705 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1706 src_linesize = EDGE_EMU_LINESIZE;
1708 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1710 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1711 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1712 linesize, block_h, 0, 0);
1717 * chroma MC function
1719 * @param s VP8 decoding context
1720 * @param dst1 target buffer for block data at block position (U plane)
1721 * @param dst2 target buffer for block data at block position (V plane)
1722 * @param ref reference picture buffer at origin (0, 0)
1723 * @param mv motion vector (relative to block position) to get pixel data from
1724 * @param x_off horizontal position of block from origin (0, 0)
1725 * @param y_off vertical position of block from origin (0, 0)
1726 * @param block_w width of block (16, 8 or 4)
1727 * @param block_h height of block (always same as block_w)
1728 * @param width width of src/dst plane data
1729 * @param height height of src/dst plane data
1730 * @param linesize size of a single line of plane data, including padding
1731 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1733 static av_always_inline
1734 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1735 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1736 int x_off, int y_off, int block_w, int block_h,
1737 int width, int height, ptrdiff_t linesize,
1738 vp8_mc_func mc_func[3][3])
1740 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1743 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1744 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1746 x_off += mv->x >> 3;
1747 y_off += mv->y >> 3;
1750 src1 += y_off * linesize + x_off;
1751 src2 += y_off * linesize + x_off;
1752 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1753 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1754 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1755 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1756 src1 - my_idx * linesize - mx_idx,
1757 EDGE_EMU_LINESIZE, linesize,
1758 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1759 x_off - mx_idx, y_off - my_idx, width, height);
1760 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1761 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1763 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1764 src2 - my_idx * linesize - mx_idx,
1765 EDGE_EMU_LINESIZE, linesize,
1766 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1767 x_off - mx_idx, y_off - my_idx, width, height);
1768 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1769 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1771 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1772 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1775 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1776 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1777 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1781 static av_always_inline
1782 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1783 ThreadFrame *ref_frame, int x_off, int y_off,
1784 int bx_off, int by_off, int block_w, int block_h,
1785 int width, int height, VP56mv *mv)
1790 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1791 ref_frame, mv, x_off + bx_off, y_off + by_off,
1792 block_w, block_h, width, height, s->linesize,
1793 s->put_pixels_tab[block_w == 8]);
1796 if (s->profile == 3) {
1797 /* this block only applies VP8; it is safe to check
1798 * only the profile, as VP7 profile <= 1 */
1810 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1811 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1812 &uvmv, x_off + bx_off, y_off + by_off,
1813 block_w, block_h, width, height, s->uvlinesize,
1814 s->put_pixels_tab[1 + (block_w == 4)]);
1817 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1818 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1819 static av_always_inline
1820 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1823 /* Don't prefetch refs that haven't been used very often this frame. */
1824 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1825 int x_off = mb_x << 4, y_off = mb_y << 4;
1826 int mx = (mb->mv.x >> 2) + x_off + 8;
1827 int my = (mb->mv.y >> 2) + y_off;
1828 uint8_t **src = s->framep[ref]->tf.f->data;
1829 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1830 /* For threading, a ff_thread_await_progress here might be useful, but
1831 * it actually slows down the decoder. Since a bad prefetch doesn't
1832 * generate bad decoder output, we don't run it here. */
1833 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1834 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1835 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1840 * Apply motion vectors to prediction buffer, chapter 18.
1842 static av_always_inline
1843 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1844 VP8Macroblock *mb, int mb_x, int mb_y)
1846 int x_off = mb_x << 4, y_off = mb_y << 4;
1847 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1848 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1849 VP56mv *bmv = mb->bmv;
1851 switch (mb->partitioning) {
1852 case VP8_SPLITMVMODE_NONE:
1853 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1854 0, 0, 16, 16, width, height, &mb->mv);
1856 case VP8_SPLITMVMODE_4x4: {
1861 for (y = 0; y < 4; y++) {
1862 for (x = 0; x < 4; x++) {
1863 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1864 ref, &bmv[4 * y + x],
1865 4 * x + x_off, 4 * y + y_off, 4, 4,
1866 width, height, s->linesize,
1867 s->put_pixels_tab[2]);
1876 for (y = 0; y < 2; y++) {
1877 for (x = 0; x < 2; x++) {
1878 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1879 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1880 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1881 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1882 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1883 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1884 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1885 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1886 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1887 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1888 if (s->profile == 3) {
1892 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1893 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1894 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1895 width, height, s->uvlinesize,
1896 s->put_pixels_tab[2]);
1901 case VP8_SPLITMVMODE_16x8:
1902 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1903 0, 0, 16, 8, width, height, &bmv[0]);
1904 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1905 0, 8, 16, 8, width, height, &bmv[1]);
1907 case VP8_SPLITMVMODE_8x16:
1908 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1909 0, 0, 8, 16, width, height, &bmv[0]);
1910 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1911 8, 0, 8, 16, width, height, &bmv[1]);
1913 case VP8_SPLITMVMODE_8x8:
1914 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1915 0, 0, 8, 8, width, height, &bmv[0]);
1916 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1917 8, 0, 8, 8, width, height, &bmv[1]);
1918 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1919 0, 8, 8, 8, width, height, &bmv[2]);
1920 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1921 8, 8, 8, 8, width, height, &bmv[3]);
1926 static av_always_inline
1927 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1931 if (mb->mode != MODE_I4x4) {
1932 uint8_t *y_dst = dst[0];
1933 for (y = 0; y < 4; y++) {
1934 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1936 if (nnz4 & ~0x01010101) {
1937 for (x = 0; x < 4; x++) {
1938 if ((uint8_t) nnz4 == 1)
1939 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1942 else if ((uint8_t) nnz4 > 1)
1943 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1951 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1954 y_dst += 4 * s->linesize;
1958 for (ch = 0; ch < 2; ch++) {
1959 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1961 uint8_t *ch_dst = dst[1 + ch];
1962 if (nnz4 & ~0x01010101) {
1963 for (y = 0; y < 2; y++) {
1964 for (x = 0; x < 2; x++) {
1965 if ((uint8_t) nnz4 == 1)
1966 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1967 td->block[4 + ch][(y << 1) + x],
1969 else if ((uint8_t) nnz4 > 1)
1970 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
1971 td->block[4 + ch][(y << 1) + x],
1975 goto chroma_idct_end;
1977 ch_dst += 4 * s->uvlinesize;
1980 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
1988 static av_always_inline
1989 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
1990 VP8FilterStrength *f, int is_vp7)
1992 int interior_limit, filter_level;
1994 if (s->segmentation.enabled) {
1995 filter_level = s->segmentation.filter_level[mb->segment];
1996 if (!s->segmentation.absolute_vals)
1997 filter_level += s->filter.level;
1999 filter_level = s->filter.level;
2001 if (s->lf_delta.enabled) {
2002 filter_level += s->lf_delta.ref[mb->ref_frame];
2003 filter_level += s->lf_delta.mode[mb->mode];
2006 filter_level = av_clip_uintp2(filter_level, 6);
2008 interior_limit = filter_level;
2009 if (s->filter.sharpness) {
2010 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2011 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2013 interior_limit = FFMAX(interior_limit, 1);
2015 f->filter_level = filter_level;
2016 f->inner_limit = interior_limit;
2017 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2018 mb->mode == VP8_MVMODE_SPLIT;
2021 static av_always_inline
2022 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2023 int mb_x, int mb_y, int is_vp7)
2025 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2026 int filter_level = f->filter_level;
2027 int inner_limit = f->inner_limit;
2028 int inner_filter = f->inner_filter;
2029 int linesize = s->linesize;
2030 int uvlinesize = s->uvlinesize;
2031 static const uint8_t hev_thresh_lut[2][64] = {
2032 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2033 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2034 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2036 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2037 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2038 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2046 bedge_lim_y = filter_level;
2047 bedge_lim_uv = filter_level * 2;
2048 mbedge_lim = filter_level + 2;
2051 bedge_lim_uv = filter_level * 2 + inner_limit;
2052 mbedge_lim = bedge_lim_y + 4;
2055 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2058 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2059 mbedge_lim, inner_limit, hev_thresh);
2060 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2061 mbedge_lim, inner_limit, hev_thresh);
2064 #define H_LOOP_FILTER_16Y_INNER(cond) \
2065 if (cond && inner_filter) { \
2066 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2067 bedge_lim_y, inner_limit, \
2069 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2070 bedge_lim_y, inner_limit, \
2072 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2073 bedge_lim_y, inner_limit, \
2075 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2076 uvlinesize, bedge_lim_uv, \
2077 inner_limit, hev_thresh); \
2080 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2083 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2084 mbedge_lim, inner_limit, hev_thresh);
2085 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2086 mbedge_lim, inner_limit, hev_thresh);
2090 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2091 linesize, bedge_lim_y,
2092 inner_limit, hev_thresh);
2093 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2094 linesize, bedge_lim_y,
2095 inner_limit, hev_thresh);
2096 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2097 linesize, bedge_lim_y,
2098 inner_limit, hev_thresh);
2099 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2100 dst[2] + 4 * uvlinesize,
2101 uvlinesize, bedge_lim_uv,
2102 inner_limit, hev_thresh);
2105 H_LOOP_FILTER_16Y_INNER(is_vp7)
2108 static av_always_inline
2109 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2112 int mbedge_lim, bedge_lim;
2113 int filter_level = f->filter_level;
2114 int inner_limit = f->inner_limit;
2115 int inner_filter = f->inner_filter;
2116 int linesize = s->linesize;
2121 bedge_lim = 2 * filter_level + inner_limit;
2122 mbedge_lim = bedge_lim + 4;
2125 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2127 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2128 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2129 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2133 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2135 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2136 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2137 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2141 #define MARGIN (16 << 2)
2142 static av_always_inline
2143 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2144 VP8Frame *prev_frame, int is_vp7)
2146 VP8Context *s = avctx->priv_data;
2149 s->mv_min.y = -MARGIN;
2150 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2151 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2152 VP8Macroblock *mb = s->macroblocks_base +
2153 ((s->mb_width + 1) * (mb_y + 1) + 1);
2154 int mb_xy = mb_y * s->mb_width;
2156 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2158 s->mv_min.x = -MARGIN;
2159 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2160 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2162 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2163 DC_PRED * 0x01010101);
2164 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2165 prev_frame && prev_frame->seg_map ?
2166 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2175 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2176 VP8Frame *prev_frame)
2178 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2181 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2182 VP8Frame *prev_frame)
2184 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2188 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2190 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2191 if (otd->thread_mb_pos < tmp) { \
2192 pthread_mutex_lock(&otd->lock); \
2193 td->wait_mb_pos = tmp; \
2195 if (otd->thread_mb_pos >= tmp) \
2197 pthread_cond_wait(&otd->cond, &otd->lock); \
2199 td->wait_mb_pos = INT_MAX; \
2200 pthread_mutex_unlock(&otd->lock); \
2204 #define update_pos(td, mb_y, mb_x) \
2206 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2207 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2209 int is_null = !next_td || !prev_td; \
2210 int pos_check = (is_null) ? 1 \
2211 : (next_td != td && \
2212 pos >= next_td->wait_mb_pos) || \
2214 pos >= prev_td->wait_mb_pos); \
2215 td->thread_mb_pos = pos; \
2216 if (sliced_threading && pos_check) { \
2217 pthread_mutex_lock(&td->lock); \
2218 pthread_cond_broadcast(&td->cond); \
2219 pthread_mutex_unlock(&td->lock); \
2223 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2224 #define update_pos(td, mb_y, mb_x)
2227 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2228 int jobnr, int threadnr, int is_vp7)
2230 VP8Context *s = avctx->priv_data;
2231 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2232 int mb_y = td->thread_mb_pos >> 16;
2233 int mb_x, mb_xy = mb_y * s->mb_width;
2234 int num_jobs = s->num_jobs;
2235 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2236 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2239 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2240 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2241 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2246 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2247 if (mb_y == s->mb_height - 1)
2250 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2251 if (s->mb_layout == 1)
2252 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2254 // Make sure the previous frame has read its segmentation map,
2255 // if we re-use the same map.
2256 if (prev_frame && s->segmentation.enabled &&
2257 !s->segmentation.update_map)
2258 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2259 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2260 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2261 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2264 if (!is_vp7 || mb_y == 0)
2265 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2267 s->mv_min.x = -MARGIN;
2268 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2270 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2271 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2272 if (prev_td != td) {
2273 if (threadnr != 0) {
2274 check_thread_pos(td, prev_td,
2275 mb_x + (is_vp7 ? 2 : 1),
2276 mb_y - (is_vp7 ? 2 : 1));
2278 check_thread_pos(td, prev_td,
2279 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2280 mb_y - (is_vp7 ? 2 : 1));
2284 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2286 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2287 dst[2] - dst[1], 2);
2290 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2291 prev_frame && prev_frame->seg_map ?
2292 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2294 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2297 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2299 if (mb->mode <= MODE_I4x4)
2300 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2302 inter_predict(s, td, dst, mb, mb_x, mb_y);
2304 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2307 idct_mb(s, td, dst, mb);
2309 AV_ZERO64(td->left_nnz);
2310 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2312 /* Reset DC block predictors if they would exist
2313 * if the mb had coefficients */
2314 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2315 td->left_nnz[8] = 0;
2316 s->top_nnz[mb_x][8] = 0;
2320 if (s->deblock_filter)
2321 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2323 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2324 if (s->filter.simple)
2325 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2326 NULL, NULL, s->linesize, 0, 1);
2328 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2329 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2332 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2340 if (mb_x == s->mb_width + 1) {
2341 update_pos(td, mb_y, s->mb_width + 3);
2343 update_pos(td, mb_y, mb_x);
2348 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2349 int jobnr, int threadnr, int is_vp7)
2351 VP8Context *s = avctx->priv_data;
2352 VP8ThreadData *td = &s->thread_data[threadnr];
2353 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2354 AVFrame *curframe = s->curframe->tf.f;
2356 VP8ThreadData *prev_td, *next_td;
2358 curframe->data[0] + 16 * mb_y * s->linesize,
2359 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2360 curframe->data[2] + 8 * mb_y * s->uvlinesize
2363 if (s->mb_layout == 1)
2364 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2366 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2371 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2372 if (mb_y == s->mb_height - 1)
2375 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2377 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2378 VP8FilterStrength *f = &td->filter_strength[mb_x];
2380 check_thread_pos(td, prev_td,
2381 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2383 if (next_td != &s->thread_data[0])
2384 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2386 if (num_jobs == 1) {
2387 if (s->filter.simple)
2388 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2389 NULL, NULL, s->linesize, 0, 1);
2391 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2392 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2395 if (s->filter.simple)
2396 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2398 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2403 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2407 static av_always_inline
2408 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2409 int threadnr, int is_vp7)
2411 VP8Context *s = avctx->priv_data;
2412 VP8ThreadData *td = &s->thread_data[jobnr];
2413 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2414 VP8Frame *curframe = s->curframe;
2415 int mb_y, num_jobs = s->num_jobs;
2417 td->thread_nr = threadnr;
2418 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2419 if (mb_y >= s->mb_height)
2421 td->thread_mb_pos = mb_y << 16;
2422 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
2423 if (s->deblock_filter)
2424 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
2425 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2430 if (avctx->active_thread_type == FF_THREAD_FRAME)
2431 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2437 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2438 int jobnr, int threadnr)
2440 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2443 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2444 int jobnr, int threadnr)
2446 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2450 static av_always_inline
2451 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2452 AVPacket *avpkt, int is_vp7)
2454 VP8Context *s = avctx->priv_data;
2455 int ret, i, referenced, num_jobs;
2456 enum AVDiscard skip_thresh;
2457 VP8Frame *av_uninit(curframe), *prev_frame;
2460 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2462 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2467 prev_frame = s->framep[VP56_FRAME_CURRENT];
2469 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2470 s->update_altref == VP56_FRAME_CURRENT;
2472 skip_thresh = !referenced ? AVDISCARD_NONREF
2473 : !s->keyframe ? AVDISCARD_NONKEY
2476 if (avctx->skip_frame >= skip_thresh) {
2478 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2481 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2483 // release no longer referenced frames
2484 for (i = 0; i < 5; i++)
2485 if (s->frames[i].tf.f->data[0] &&
2486 &s->frames[i] != prev_frame &&
2487 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2488 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2489 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2490 vp8_release_frame(s, &s->frames[i]);
2492 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2495 avctx->colorspace = AVCOL_SPC_BT470BG;
2497 avctx->color_range = AVCOL_RANGE_JPEG;
2499 avctx->color_range = AVCOL_RANGE_MPEG;
2501 /* Given that arithmetic probabilities are updated every frame, it's quite
2502 * likely that the values we have on a random interframe are complete
2503 * junk if we didn't start decode on a keyframe. So just don't display
2504 * anything rather than junk. */
2505 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2506 !s->framep[VP56_FRAME_GOLDEN] ||
2507 !s->framep[VP56_FRAME_GOLDEN2])) {
2508 av_log(avctx, AV_LOG_WARNING,
2509 "Discarding interframe without a prior keyframe!\n");
2510 ret = AVERROR_INVALIDDATA;
2514 curframe->tf.f->key_frame = s->keyframe;
2515 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2516 : AV_PICTURE_TYPE_P;
2517 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
2518 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
2522 // check if golden and altref are swapped
2523 if (s->update_altref != VP56_FRAME_NONE)
2524 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2526 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2528 if (s->update_golden != VP56_FRAME_NONE)
2529 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2531 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2534 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2536 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2538 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2540 ff_thread_finish_setup(avctx);
2542 s->linesize = curframe->tf.f->linesize[0];
2543 s->uvlinesize = curframe->tf.f->linesize[1];
2545 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2546 /* Zero macroblock structures for top/top-left prediction
2547 * from outside the frame. */
2549 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2550 (s->mb_width + 1) * sizeof(*s->macroblocks));
2551 if (!s->mb_layout && s->keyframe)
2552 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2554 memset(s->ref_count, 0, sizeof(s->ref_count));
2556 if (s->mb_layout == 1) {
2557 // Make sure the previous frame has read its segmentation map,
2558 // if we re-use the same map.
2559 if (prev_frame && s->segmentation.enabled &&
2560 !s->segmentation.update_map)
2561 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2563 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2565 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2568 if (avctx->active_thread_type == FF_THREAD_FRAME)
2571 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2572 s->num_jobs = num_jobs;
2573 s->curframe = curframe;
2574 s->prev_frame = prev_frame;
2575 s->mv_min.y = -MARGIN;
2576 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2577 for (i = 0; i < MAX_THREADS; i++) {
2578 s->thread_data[i].thread_mb_pos = 0;
2579 s->thread_data[i].wait_mb_pos = INT_MAX;
2582 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2585 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2588 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2589 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2592 // if future frames don't use the updated probabilities,
2593 // reset them to the values we saved
2594 if (!s->update_probabilities)
2595 s->prob[0] = s->prob[1];
2597 if (!s->invisible) {
2598 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2605 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2609 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2612 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2615 #if CONFIG_VP7_DECODER
2616 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2619 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2621 #endif /* CONFIG_VP7_DECODER */
2623 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2625 VP8Context *s = avctx->priv_data;
2628 vp8_decode_flush_impl(avctx, 1);
2629 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2630 av_frame_free(&s->frames[i].tf.f);
2635 static av_cold int vp8_init_frames(VP8Context *s)
2638 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2639 s->frames[i].tf.f = av_frame_alloc();
2640 if (!s->frames[i].tf.f)
2641 return AVERROR(ENOMEM);
2646 static av_always_inline
2647 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2649 VP8Context *s = avctx->priv_data;
2653 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2654 avctx->internal->allocate_progress = 1;
2656 ff_videodsp_init(&s->vdsp, 8);
2658 ff_vp78dsp_init(&s->vp8dsp);
2659 if (CONFIG_VP7_DECODER && is_vp7) {
2660 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2661 ff_vp7dsp_init(&s->vp8dsp);
2662 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2663 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2664 ff_vp8dsp_init(&s->vp8dsp);
2667 /* does not change for VP8 */
2668 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2670 if ((ret = vp8_init_frames(s)) < 0) {
2671 ff_vp8_decode_free(avctx);
2678 #if CONFIG_VP7_DECODER
2679 static int vp7_decode_init(AVCodecContext *avctx)
2681 return vp78_decode_init(avctx, IS_VP7);
2683 #endif /* CONFIG_VP7_DECODER */
2685 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2687 return vp78_decode_init(avctx, IS_VP8);
2690 #if CONFIG_VP8_DECODER
2691 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2693 VP8Context *s = avctx->priv_data;
2698 if ((ret = vp8_init_frames(s)) < 0) {
2699 ff_vp8_decode_free(avctx);
2706 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2708 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2709 const AVCodecContext *src)
2711 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2714 if (s->macroblocks_base &&
2715 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2717 s->mb_width = s_src->mb_width;
2718 s->mb_height = s_src->mb_height;
2721 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2722 s->segmentation = s_src->segmentation;
2723 s->lf_delta = s_src->lf_delta;
2724 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2726 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2727 if (s_src->frames[i].tf.f->data[0]) {
2728 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2734 s->framep[0] = REBASE(s_src->next_framep[0]);
2735 s->framep[1] = REBASE(s_src->next_framep[1]);
2736 s->framep[2] = REBASE(s_src->next_framep[2]);
2737 s->framep[3] = REBASE(s_src->next_framep[3]);
2741 #endif /* CONFIG_VP8_DECODER */
2743 #if CONFIG_VP7_DECODER
2744 AVCodec ff_vp7_decoder = {
2746 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2747 .type = AVMEDIA_TYPE_VIDEO,
2748 .id = AV_CODEC_ID_VP7,
2749 .priv_data_size = sizeof(VP8Context),
2750 .init = vp7_decode_init,
2751 .close = ff_vp8_decode_free,
2752 .decode = vp7_decode_frame,
2753 .capabilities = CODEC_CAP_DR1,
2754 .flush = vp8_decode_flush,
2756 #endif /* CONFIG_VP7_DECODER */
2758 #if CONFIG_VP8_DECODER
2759 AVCodec ff_vp8_decoder = {
2761 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2762 .type = AVMEDIA_TYPE_VIDEO,
2763 .id = AV_CODEC_ID_VP8,
2764 .priv_data_size = sizeof(VP8Context),
2765 .init = ff_vp8_decode_init,
2766 .close = ff_vp8_decode_free,
2767 .decode = ff_vp8_decode_frame,
2768 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2769 .flush = vp8_decode_flush,
2770 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2771 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2773 #endif /* CONFIG_VP7_DECODER */