2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
31 #include "rectangle.h"
40 static void free_buffers(VP8Context *s)
44 for (i = 0; i < MAX_THREADS; i++) {
46 pthread_cond_destroy(&s->thread_data[i].cond);
47 pthread_mutex_destroy(&s->thread_data[i].lock);
49 av_freep(&s->thread_data[i].filter_strength);
51 av_freep(&s->thread_data);
52 av_freep(&s->macroblocks_base);
53 av_freep(&s->intra4x4_pred_mode_top);
54 av_freep(&s->top_nnz);
55 av_freep(&s->top_border);
57 s->macroblocks = NULL;
60 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
63 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
64 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
66 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
67 ff_thread_release_buffer(s->avctx, &f->tf);
68 return AVERROR(ENOMEM);
73 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
75 av_buffer_unref(&f->seg_map);
76 ff_thread_release_buffer(s->avctx, &f->tf);
79 #if CONFIG_VP8_DECODER
80 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
84 vp8_release_frame(s, dst);
86 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
89 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
90 vp8_release_frame(s, dst);
91 return AVERROR(ENOMEM);
96 #endif /* CONFIG_VP8_DECODER */
98 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
100 VP8Context *s = avctx->priv_data;
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp8_release_frame(s, &s->frames[i]);
105 memset(s->framep, 0, sizeof(s->framep));
111 static void vp8_decode_flush(AVCodecContext *avctx)
113 vp8_decode_flush_impl(avctx, 0);
116 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
118 VP8Frame *frame = NULL;
121 // find a free buffer
122 for (i = 0; i < 5; i++)
123 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
124 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
125 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
126 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
127 frame = &s->frames[i];
131 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
134 if (frame->tf.f->data[0])
135 vp8_release_frame(s, frame);
140 static av_always_inline
141 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
143 AVCodecContext *avctx = s->avctx;
146 if (width != s->avctx->width ||
147 height != s->avctx->height) {
148 vp8_decode_flush_impl(s->avctx, 1);
150 ret = ff_set_dimensions(s->avctx, width, height);
155 s->mb_width = (s->avctx->coded_width + 15) / 16;
156 s->mb_height = (s->avctx->coded_height + 15) / 16;
158 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
159 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
160 if (!s->mb_layout) { // Frame threading and one thread
161 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
162 sizeof(*s->macroblocks));
163 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
164 } else // Sliced threading
165 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
166 sizeof(*s->macroblocks));
167 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
168 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
169 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
171 for (i = 0; i < MAX_THREADS; i++) {
172 s->thread_data[i].filter_strength =
173 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
175 pthread_mutex_init(&s->thread_data[i].lock, NULL);
176 pthread_cond_init(&s->thread_data[i].cond, NULL);
180 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
181 (!s->intra4x4_pred_mode_top && !s->mb_layout))
182 return AVERROR(ENOMEM);
184 s->macroblocks = s->macroblocks_base + 1;
189 static int vp7_update_dimensions(VP8Context *s, int width, int height)
191 return update_dimensions(s, width, height, IS_VP7);
194 static int vp8_update_dimensions(VP8Context *s, int width, int height)
196 return update_dimensions(s, width, height, IS_VP8);
199 static void parse_segment_info(VP8Context *s)
201 VP56RangeCoder *c = &s->c;
204 s->segmentation.update_map = vp8_rac_get(c);
206 if (vp8_rac_get(c)) { // update segment feature data
207 s->segmentation.absolute_vals = vp8_rac_get(c);
209 for (i = 0; i < 4; i++)
210 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
212 for (i = 0; i < 4; i++)
213 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
215 if (s->segmentation.update_map)
216 for (i = 0; i < 3; i++)
217 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
220 static void update_lf_deltas(VP8Context *s)
222 VP56RangeCoder *c = &s->c;
225 for (i = 0; i < 4; i++) {
226 if (vp8_rac_get(c)) {
227 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
230 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
234 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
235 if (vp8_rac_get(c)) {
236 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
239 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
244 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
246 const uint8_t *sizes = buf;
249 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
251 buf += 3 * (s->num_coeff_partitions - 1);
252 buf_size -= 3 * (s->num_coeff_partitions - 1);
256 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
257 int size = AV_RL24(sizes + 3 * i);
258 if (buf_size - size < 0)
261 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
265 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
270 static void vp7_get_quants(VP8Context *s)
272 VP56RangeCoder *c = &s->c;
274 int yac_qi = vp8_rac_get_uint(c, 7);
275 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
276 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
277 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
278 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
279 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
281 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
282 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
283 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
284 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
285 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
286 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
289 static void get_quants(VP8Context *s)
291 VP56RangeCoder *c = &s->c;
294 int yac_qi = vp8_rac_get_uint(c, 7);
295 int ydc_delta = vp8_rac_get_sint(c, 4);
296 int y2dc_delta = vp8_rac_get_sint(c, 4);
297 int y2ac_delta = vp8_rac_get_sint(c, 4);
298 int uvdc_delta = vp8_rac_get_sint(c, 4);
299 int uvac_delta = vp8_rac_get_sint(c, 4);
301 for (i = 0; i < 4; i++) {
302 if (s->segmentation.enabled) {
303 base_qi = s->segmentation.base_quant[i];
304 if (!s->segmentation.absolute_vals)
309 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
310 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
311 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
312 /* 101581>>16 is equivalent to 155/100 */
313 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
314 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
315 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
317 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
318 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
323 * Determine which buffers golden and altref should be updated with after this frame.
324 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
326 * Intra frames update all 3 references
327 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
328 * If the update (golden|altref) flag is set, it's updated with the current frame
329 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
330 * If the flag is not set, the number read means:
332 * 1: VP56_FRAME_PREVIOUS
333 * 2: update golden with altref, or update altref with golden
335 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
337 VP56RangeCoder *c = &s->c;
340 return VP56_FRAME_CURRENT;
342 switch (vp8_rac_get_uint(c, 2)) {
344 return VP56_FRAME_PREVIOUS;
346 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
348 return VP56_FRAME_NONE;
351 static void vp78_reset_probability_tables(VP8Context *s)
354 for (i = 0; i < 4; i++)
355 for (j = 0; j < 16; j++)
356 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
357 sizeof(s->prob->token[i][j]));
360 static void vp78_update_probability_tables(VP8Context *s)
362 VP56RangeCoder *c = &s->c;
365 for (i = 0; i < 4; i++)
366 for (j = 0; j < 8; j++)
367 for (k = 0; k < 3; k++)
368 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
369 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
370 int prob = vp8_rac_get_uint(c, 8);
371 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
372 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
376 #define VP7_MVC_SIZE 17
377 #define VP8_MVC_SIZE 19
379 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
382 VP56RangeCoder *c = &s->c;
386 for (i = 0; i < 4; i++)
387 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
389 for (i = 0; i < 3; i++)
390 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
392 // 17.2 MV probability update
393 for (i = 0; i < 2; i++)
394 for (j = 0; j < mvc_size; j++)
395 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
396 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
399 static void update_refs(VP8Context *s)
401 VP56RangeCoder *c = &s->c;
403 int update_golden = vp8_rac_get(c);
404 int update_altref = vp8_rac_get(c);
406 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
407 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
410 static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
414 for (j = 1; j < 3; j++) {
415 for (i = 0; i < height / 2; i++)
416 memcpy(dst->data[j] + i * dst->linesize[j],
417 src->data[j] + i * src->linesize[j], width / 2);
421 static void fade(uint8_t *dst, uint8_t *src,
422 int width, int height, int linesize,
427 for (j = 0; j < height; j++) {
428 for (i = 0; i < width; i++) {
429 uint8_t y = src[j * linesize + i];
430 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
435 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
437 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
438 int beta = (int8_t) vp8_rac_get_uint(c, 8);
441 if (!s->keyframe && (alpha || beta)) {
442 int width = s->mb_width * 16;
443 int height = s->mb_height * 16;
446 if (!s->framep[VP56_FRAME_PREVIOUS])
447 return AVERROR_INVALIDDATA;
450 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
452 /* preserve the golden frame, write a new previous frame */
453 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
454 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
455 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
458 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
460 copy_luma(dst, src, width, height);
463 fade(dst->data[0], src->data[0],
464 width, height, dst->linesize[0], alpha, beta);
470 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
472 VP56RangeCoder *c = &s->c;
473 int part1_size, hscale, vscale, i, j, ret;
474 int width = s->avctx->width;
475 int height = s->avctx->height;
477 s->profile = (buf[0] >> 1) & 7;
478 if (s->profile > 1) {
479 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
480 return AVERROR_INVALIDDATA;
483 s->keyframe = !(buf[0] & 1);
485 part1_size = AV_RL24(buf) >> 4;
487 buf += 4 - s->profile;
488 buf_size -= 4 - s->profile;
490 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
492 ff_vp56_init_range_decoder(c, buf, part1_size);
494 buf_size -= part1_size;
496 /* A. Dimension information (keyframes only) */
498 width = vp8_rac_get_uint(c, 12);
499 height = vp8_rac_get_uint(c, 12);
500 hscale = vp8_rac_get_uint(c, 2);
501 vscale = vp8_rac_get_uint(c, 2);
502 if (hscale || vscale)
503 avpriv_request_sample(s->avctx, "Upscaling");
505 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
506 vp78_reset_probability_tables(s);
507 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
508 sizeof(s->prob->pred16x16));
509 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
510 sizeof(s->prob->pred8x8c));
511 for (i = 0; i < 2; i++)
512 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
513 sizeof(vp7_mv_default_prob[i]));
514 memset(&s->segmentation, 0, sizeof(s->segmentation));
515 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
516 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
519 if (s->keyframe || s->profile > 0)
520 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
522 /* B. Decoding information for all four macroblock-level features */
523 for (i = 0; i < 4; i++) {
524 s->feature_enabled[i] = vp8_rac_get(c);
525 if (s->feature_enabled[i]) {
526 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
528 for (j = 0; j < 3; j++)
529 s->feature_index_prob[i][j] =
530 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
532 if (vp7_feature_value_size[i])
533 for (j = 0; j < 4; j++)
534 s->feature_value[i][j] =
535 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
539 s->segmentation.enabled = 0;
540 s->segmentation.update_map = 0;
541 s->lf_delta.enabled = 0;
543 s->num_coeff_partitions = 1;
544 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
546 if (!s->macroblocks_base || /* first frame */
547 width != s->avctx->width || height != s->avctx->height ||
548 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
549 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
553 /* C. Dequantization indices */
556 /* D. Golden frame update flag (a Flag) for interframes only */
558 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
559 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
563 s->update_probabilities = 1;
566 if (s->profile > 0) {
567 s->update_probabilities = vp8_rac_get(c);
568 if (!s->update_probabilities)
569 s->prob[1] = s->prob[0];
572 s->fade_present = vp8_rac_get(c);
575 /* E. Fading information for previous frame */
576 if (s->fade_present && vp8_rac_get(c)) {
577 if ((ret = vp7_fade_frame(s ,c)) < 0)
581 /* F. Loop filter type */
583 s->filter.simple = vp8_rac_get(c);
585 /* G. DCT coefficient ordering specification */
587 for (i = 1; i < 16; i++)
588 s->prob[0].scan[i] = zigzag_scan[vp8_rac_get_uint(c, 4)];
590 /* H. Loop filter levels */
592 s->filter.simple = vp8_rac_get(c);
593 s->filter.level = vp8_rac_get_uint(c, 6);
594 s->filter.sharpness = vp8_rac_get_uint(c, 3);
596 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
597 vp78_update_probability_tables(s);
599 s->mbskip_enabled = 0;
601 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
603 s->prob->intra = vp8_rac_get_uint(c, 8);
604 s->prob->last = vp8_rac_get_uint(c, 8);
605 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
611 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
613 VP56RangeCoder *c = &s->c;
614 int header_size, hscale, vscale, ret;
615 int width = s->avctx->width;
616 int height = s->avctx->height;
618 s->keyframe = !(buf[0] & 1);
619 s->profile = (buf[0]>>1) & 7;
620 s->invisible = !(buf[0] & 0x10);
621 header_size = AV_RL24(buf) >> 5;
626 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
629 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
630 sizeof(s->put_pixels_tab));
631 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
632 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
633 sizeof(s->put_pixels_tab));
635 if (header_size > buf_size - 7 * s->keyframe) {
636 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
637 return AVERROR_INVALIDDATA;
641 if (AV_RL24(buf) != 0x2a019d) {
642 av_log(s->avctx, AV_LOG_ERROR,
643 "Invalid start code 0x%x\n", AV_RL24(buf));
644 return AVERROR_INVALIDDATA;
646 width = AV_RL16(buf + 3) & 0x3fff;
647 height = AV_RL16(buf + 5) & 0x3fff;
648 hscale = buf[4] >> 6;
649 vscale = buf[6] >> 6;
653 if (hscale || vscale)
654 avpriv_request_sample(s->avctx, "Upscaling");
656 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
657 vp78_reset_probability_tables(s);
658 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
659 sizeof(s->prob->pred16x16));
660 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
661 sizeof(s->prob->pred8x8c));
662 memcpy(s->prob->mvc, vp8_mv_default_prob,
663 sizeof(s->prob->mvc));
664 memset(&s->segmentation, 0, sizeof(s->segmentation));
665 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
668 ff_vp56_init_range_decoder(c, buf, header_size);
670 buf_size -= header_size;
674 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
675 vp8_rac_get(c); // whether we can skip clamping in dsp functions
678 if ((s->segmentation.enabled = vp8_rac_get(c)))
679 parse_segment_info(s);
681 s->segmentation.update_map = 0; // FIXME: move this to some init function?
683 s->filter.simple = vp8_rac_get(c);
684 s->filter.level = vp8_rac_get_uint(c, 6);
685 s->filter.sharpness = vp8_rac_get_uint(c, 3);
687 if ((s->lf_delta.enabled = vp8_rac_get(c)))
691 if (setup_partitions(s, buf, buf_size)) {
692 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
693 return AVERROR_INVALIDDATA;
696 if (!s->macroblocks_base || /* first frame */
697 width != s->avctx->width || height != s->avctx->height)
698 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
705 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
706 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
709 // if we aren't saving this frame's probabilities for future frames,
710 // make a copy of the current probabilities
711 if (!(s->update_probabilities = vp8_rac_get(c)))
712 s->prob[1] = s->prob[0];
714 s->update_last = s->keyframe || vp8_rac_get(c);
716 vp78_update_probability_tables(s);
718 if ((s->mbskip_enabled = vp8_rac_get(c)))
719 s->prob->mbskip = vp8_rac_get_uint(c, 8);
722 s->prob->intra = vp8_rac_get_uint(c, 8);
723 s->prob->last = vp8_rac_get_uint(c, 8);
724 s->prob->golden = vp8_rac_get_uint(c, 8);
725 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
731 static av_always_inline
732 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
734 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
735 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
739 * Motion vector coding, 17.1.
741 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
745 if (vp56_rac_get_prob_branchy(c, p[0])) {
748 for (i = 0; i < 3; i++)
749 x += vp56_rac_get_prob(c, p[9 + i]) << i;
750 for (i = (vp7 ? 7 : 9); i > 3; i--)
751 x += vp56_rac_get_prob(c, p[9 + i]) << i;
752 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
756 const uint8_t *ps = p + 2;
757 bit = vp56_rac_get_prob(c, *ps);
760 bit = vp56_rac_get_prob(c, *ps);
763 x += vp56_rac_get_prob(c, *ps);
766 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
769 static av_always_inline
770 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
773 return vp7_submv_prob;
776 return vp8_submv_prob[4 - !!left];
778 return vp8_submv_prob[2];
779 return vp8_submv_prob[1 - !!left];
783 * Split motion vector prediction, 16.4.
784 * @returns the number of motion vectors parsed (2, 4 or 16)
786 static av_always_inline
787 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
788 int layout, int is_vp7)
792 VP8Macroblock *top_mb;
793 VP8Macroblock *left_mb = &mb[-1];
794 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
795 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
797 VP56mv *left_mv = left_mb->bmv;
798 VP56mv *cur_mv = mb->bmv;
800 if (!layout) // layout is inlined, s->mb_layout is not
803 top_mb = &mb[-s->mb_width - 1];
804 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
805 top_mv = top_mb->bmv;
807 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
808 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
809 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
811 part_idx = VP8_SPLITMVMODE_8x8;
813 part_idx = VP8_SPLITMVMODE_4x4;
816 num = vp8_mbsplit_count[part_idx];
817 mbsplits_cur = vp8_mbsplits[part_idx],
818 firstidx = vp8_mbfirstidx[part_idx];
819 mb->partitioning = part_idx;
821 for (n = 0; n < num; n++) {
823 uint32_t left, above;
824 const uint8_t *submv_prob;
827 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
829 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
831 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
833 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
835 submv_prob = get_submv_prob(left, above, is_vp7);
837 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
838 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
839 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
840 mb->bmv[n].y = mb->mv.y +
841 read_mv_component(c, s->prob->mvc[0], is_vp7);
842 mb->bmv[n].x = mb->mv.x +
843 read_mv_component(c, s->prob->mvc[1], is_vp7);
845 AV_ZERO32(&mb->bmv[n]);
848 AV_WN32A(&mb->bmv[n], above);
851 AV_WN32A(&mb->bmv[n], left);
859 * The vp7 reference decoder uses a padding macroblock column (added to right
860 * edge of the frame) to guard against illegal macroblock offsets. The
861 * algorithm has bugs that permit offsets to straddle the padding column.
862 * This function replicates those bugs.
864 * @param[out] edge_x macroblock x address
865 * @param[out] edge_y macroblock y address
867 * @return macroblock offset legal (boolean)
869 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
870 int xoffset, int yoffset, int boundary,
871 int *edge_x, int *edge_y)
873 int vwidth = mb_width + 1;
874 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
875 if (new < boundary || new % vwidth == vwidth - 1)
877 *edge_y = new / vwidth;
878 *edge_x = new % vwidth;
882 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
884 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
887 static av_always_inline
888 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
889 int mb_x, int mb_y, int layout)
891 VP8Macroblock *mb_edge[12];
892 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
893 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
896 uint8_t cnt[3] = { 0 };
897 VP56RangeCoder *c = &s->c;
900 AV_ZERO32(&near_mv[0]);
901 AV_ZERO32(&near_mv[1]);
902 AV_ZERO32(&near_mv[2]);
904 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
905 const VP7MVPred * pred = &vp7_mv_pred[i];
908 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
909 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
910 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
911 ? s->macroblocks_base + 1 + edge_x +
912 (s->mb_width + 1) * (edge_y + 1)
913 : s->macroblocks + edge_x +
914 (s->mb_height - edge_y - 1) * 2;
915 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
917 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
918 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
920 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
921 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
925 AV_WN32A(&near_mv[CNT_NEAR], mv);
929 AV_WN32A(&near_mv[CNT_NEAREST], mv);
938 cnt[idx] += vp7_mv_pred[i].score;
941 mb->partitioning = VP8_SPLITMVMODE_NONE;
943 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
944 mb->mode = VP8_MVMODE_MV;
946 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
948 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
950 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
951 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
953 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
955 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
956 mb->mode = VP8_MVMODE_SPLIT;
957 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
959 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
960 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
964 mb->mv = near_mv[CNT_NEAR];
968 mb->mv = near_mv[CNT_NEAREST];
972 mb->mode = VP8_MVMODE_ZERO;
978 static av_always_inline
979 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
980 int mb_x, int mb_y, int layout)
982 VP8Macroblock *mb_edge[3] = { 0 /* top */,
985 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
986 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
988 int cur_sign_bias = s->sign_bias[mb->ref_frame];
989 int8_t *sign_bias = s->sign_bias;
991 uint8_t cnt[4] = { 0 };
992 VP56RangeCoder *c = &s->c;
994 if (!layout) { // layout is inlined (s->mb_layout is not)
998 mb_edge[0] = mb - s->mb_width - 1;
999 mb_edge[2] = mb - s->mb_width - 2;
1002 AV_ZERO32(&near_mv[0]);
1003 AV_ZERO32(&near_mv[1]);
1004 AV_ZERO32(&near_mv[2]);
1006 /* Process MB on top, left and top-left */
1007 #define MV_EDGE_CHECK(n) \
1009 VP8Macroblock *edge = mb_edge[n]; \
1010 int edge_ref = edge->ref_frame; \
1011 if (edge_ref != VP56_FRAME_CURRENT) { \
1012 uint32_t mv = AV_RN32A(&edge->mv); \
1014 if (cur_sign_bias != sign_bias[edge_ref]) { \
1015 /* SWAR negate of the values in mv. */ \
1017 mv = ((mv & 0x7fff7fff) + \
1018 0x00010001) ^ (mv & 0x80008000); \
1020 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1021 AV_WN32A(&near_mv[++idx], mv); \
1022 cnt[idx] += 1 + (n != 2); \
1024 cnt[CNT_ZERO] += 1 + (n != 2); \
1032 mb->partitioning = VP8_SPLITMVMODE_NONE;
1033 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1034 mb->mode = VP8_MVMODE_MV;
1036 /* If we have three distinct MVs, merge first and last if they're the same */
1037 if (cnt[CNT_SPLITMV] &&
1038 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1039 cnt[CNT_NEAREST] += 1;
1041 /* Swap near and nearest if necessary */
1042 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1043 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1044 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1047 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1048 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1049 /* Choose the best mv out of 0,0 and the nearest mv */
1050 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1051 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1052 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1053 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1055 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1056 mb->mode = VP8_MVMODE_SPLIT;
1057 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1059 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1060 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
1061 mb->bmv[0] = mb->mv;
1064 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1065 mb->bmv[0] = mb->mv;
1068 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1069 mb->bmv[0] = mb->mv;
1072 mb->mode = VP8_MVMODE_ZERO;
1074 mb->bmv[0] = mb->mv;
1078 static av_always_inline
1079 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1080 int mb_x, int keyframe, int layout)
1082 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1085 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1086 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1091 uint8_t *const left = s->intra4x4_pred_mode_left;
1093 top = mb->intra4x4_pred_mode_top;
1095 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1096 for (y = 0; y < 4; y++) {
1097 for (x = 0; x < 4; x++) {
1099 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1100 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1101 left[y] = top[x] = *intra4x4;
1107 for (i = 0; i < 16; i++)
1108 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1109 vp8_pred4x4_prob_inter);
1113 static av_always_inline
1114 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1115 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1117 VP56RangeCoder *c = &s->c;
1118 const char *vp7_feature_name[] = { "q-index",
1120 "partial-golden-update",
1125 for (i = 0; i < 4; i++) {
1126 if (s->feature_enabled[i]) {
1127 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1128 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1129 s->feature_index_prob[i]);
1130 av_log(s->avctx, AV_LOG_WARNING,
1131 "Feature %s present in macroblock (value 0x%x)\n",
1132 vp7_feature_name[i], s->feature_value[i][index]);
1136 } else if (s->segmentation.update_map)
1137 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
1138 else if (s->segmentation.enabled)
1139 *segment = ref ? *ref : *segment;
1140 mb->segment = *segment;
1142 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1145 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1146 vp8_pred16x16_prob_intra);
1148 if (mb->mode == MODE_I4x4) {
1149 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1151 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1152 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1153 if (s->mb_layout == 1)
1154 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1156 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1157 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1160 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1161 vp8_pred8x8c_prob_intra);
1162 mb->ref_frame = VP56_FRAME_CURRENT;
1163 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1165 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1167 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1168 : VP56_FRAME_GOLDEN;
1170 mb->ref_frame = VP56_FRAME_PREVIOUS;
1171 s->ref_count[mb->ref_frame - 1]++;
1173 // motion vectors, 16.3
1175 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1177 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1180 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1182 if (mb->mode == MODE_I4x4)
1183 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1185 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1187 mb->ref_frame = VP56_FRAME_CURRENT;
1188 mb->partitioning = VP8_SPLITMVMODE_NONE;
1189 AV_ZERO32(&mb->bmv[0]);
1194 * @param r arithmetic bitstream reader context
1195 * @param block destination for block coefficients
1196 * @param probs probabilities to use when reading trees from the bitstream
1197 * @param i initial coeff index, 0 unless a separate DC block is coded
1198 * @param qmul array holding the dc/ac dequant factor at position 0/1
1200 * @return 0 if no coeffs were decoded
1201 * otherwise, the index of the last coeff decoded plus one
1203 static av_always_inline
1204 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1205 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1206 int i, uint8_t *token_prob, int16_t qmul[2],
1207 const uint8_t scan[16], int vp7)
1209 VP56RangeCoder c = *r;
1214 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1218 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1220 break; // invalid input; blocks should end with EOB
1221 token_prob = probs[i][0];
1227 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1229 token_prob = probs[i + 1][1];
1231 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1232 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1234 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1238 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1239 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1240 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1241 } else { // DCT_CAT2
1243 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1244 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1246 } else { // DCT_CAT3 and up
1247 int a = vp56_rac_get_prob(&c, token_prob[8]);
1248 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1249 int cat = (a << 1) + b;
1250 coeff = 3 + (8 << cat);
1251 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1254 token_prob = probs[i + 1][2];
1256 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1263 static av_always_inline
1264 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1266 int16_t dc = block[0];
1274 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1275 block[0] = pred[0] = dc;
1280 block[0] = pred[0] = dc;
1286 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1288 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1289 int i, uint8_t *token_prob,
1291 const uint8_t scan[16])
1293 return decode_block_coeffs_internal(r, block, probs, i,
1294 token_prob, qmul, scan, IS_VP7);
1297 #ifndef vp8_decode_block_coeffs_internal
1298 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1300 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1301 int i, uint8_t *token_prob,
1304 return decode_block_coeffs_internal(r, block, probs, i,
1305 token_prob, qmul, zigzag_scan, IS_VP8);
1310 * @param c arithmetic bitstream reader context
1311 * @param block destination for block coefficients
1312 * @param probs probabilities to use when reading trees from the bitstream
1313 * @param i initial coeff index, 0 unless a separate DC block is coded
1314 * @param zero_nhood the initial prediction context for number of surrounding
1315 * all-zero blocks (only left/top, so 0-2)
1316 * @param qmul array holding the dc/ac dequant factor at position 0/1
1318 * @return 0 if no coeffs were decoded
1319 * otherwise, the index of the last coeff decoded plus one
1321 static av_always_inline
1322 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1323 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1324 int i, int zero_nhood, int16_t qmul[2],
1325 const uint8_t scan[16], int vp7)
1327 uint8_t *token_prob = probs[i][zero_nhood];
1328 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1330 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1331 token_prob, qmul, scan)
1332 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1336 static av_always_inline
1337 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1338 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1341 int i, x, y, luma_start = 0, luma_ctx = 3;
1342 int nnz_pred, nnz, nnz_total = 0;
1343 int segment = mb->segment;
1346 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1347 nnz_pred = t_nnz[8] + l_nnz[8];
1349 // decode DC values and do hadamard
1350 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1351 nnz_pred, s->qmat[segment].luma_dc_qmul,
1352 zigzag_scan, is_vp7);
1353 l_nnz[8] = t_nnz[8] = !!nnz;
1355 if (is_vp7 && mb->mode > MODE_I4x4) {
1356 nnz |= inter_predict_dc(td->block_dc,
1357 s->inter_dc_pred[mb->ref_frame - 1]);
1364 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1366 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1373 for (y = 0; y < 4; y++)
1374 for (x = 0; x < 4; x++) {
1375 nnz_pred = l_nnz[y] + t_nnz[x];
1376 nnz = decode_block_coeffs(c, td->block[y][x],
1377 s->prob->token[luma_ctx],
1378 luma_start, nnz_pred,
1379 s->qmat[segment].luma_qmul,
1380 s->prob[0].scan, is_vp7);
1381 /* nnz+block_dc may be one more than the actual last index,
1382 * but we don't care */
1383 td->non_zero_count_cache[y][x] = nnz + block_dc;
1384 t_nnz[x] = l_nnz[y] = !!nnz;
1389 // TODO: what to do about dimensions? 2nd dim for luma is x,
1390 // but for chroma it's (y<<1)|x
1391 for (i = 4; i < 6; i++)
1392 for (y = 0; y < 2; y++)
1393 for (x = 0; x < 2; x++) {
1394 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1395 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1396 s->prob->token[2], 0, nnz_pred,
1397 s->qmat[segment].chroma_qmul,
1398 s->prob[0].scan, is_vp7);
1399 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1400 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1404 // if there were no coded coeffs despite the macroblock not being marked skip,
1405 // we MUST not do the inner loop filter and should not do IDCT
1406 // Since skip isn't used for bitstream prediction, just manually set it.
1411 static av_always_inline
1412 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1413 uint8_t *src_cb, uint8_t *src_cr,
1414 int linesize, int uvlinesize, int simple)
1416 AV_COPY128(top_border, src_y + 15 * linesize);
1418 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1419 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1423 static av_always_inline
1424 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1425 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1426 int mb_y, int mb_width, int simple, int xchg)
1428 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1430 src_cb -= uvlinesize;
1431 src_cr -= uvlinesize;
1433 #define XCHG(a, b, xchg) \
1441 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1442 XCHG(top_border, src_y, xchg);
1443 XCHG(top_border + 8, src_y + 8, 1);
1444 if (mb_x < mb_width - 1)
1445 XCHG(top_border + 32, src_y + 16, 1);
1447 // only copy chroma for normal loop filter
1448 // or to initialize the top row to 127
1449 if (!simple || !mb_y) {
1450 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1451 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1452 XCHG(top_border + 16, src_cb, 1);
1453 XCHG(top_border + 24, src_cr, 1);
1457 static av_always_inline
1458 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1461 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1463 return mb_y ? mode : LEFT_DC_PRED8x8;
1466 static av_always_inline
1467 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1470 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1472 return mb_y ? mode : HOR_PRED8x8;
1475 static av_always_inline
1476 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1480 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1482 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1484 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1485 case PLANE_PRED8x8: /* TM */
1486 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1491 static av_always_inline
1492 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1495 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1497 return mb_y ? mode : HOR_VP8_PRED;
1501 static av_always_inline
1502 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1503 int *copy_buf, int vp7)
1507 if (!mb_x && mb_y) {
1512 case DIAG_DOWN_LEFT_PRED:
1513 case VERT_LEFT_PRED:
1514 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1522 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1524 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1525 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1526 * as 16x16/8x8 DC */
1527 case DIAG_DOWN_RIGHT_PRED:
1528 case VERT_RIGHT_PRED:
1537 static av_always_inline
1538 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1539 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1541 int x, y, mode, nnz;
1544 /* for the first row, we need to run xchg_mb_border to init the top edge
1545 * to 127 otherwise, skip it if we aren't going to deblock */
1546 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1547 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1548 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1549 s->filter.simple, 1);
1551 if (mb->mode < MODE_I4x4) {
1552 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1553 s->hpc.pred16x16[mode](dst[0], s->linesize);
1555 uint8_t *ptr = dst[0];
1556 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1557 const uint8_t lo = is_vp7 ? 128 : 127;
1558 const uint8_t hi = is_vp7 ? 128 : 129;
1559 uint8_t tr_top[4] = { lo, lo, lo, lo };
1561 // all blocks on the right edge of the macroblock use bottom edge
1562 // the top macroblock for their topright edge
1563 uint8_t *tr_right = ptr - s->linesize + 16;
1565 // if we're on the right edge of the frame, said edge is extended
1566 // from the top macroblock
1567 if (mb_y && mb_x == s->mb_width - 1) {
1568 tr = tr_right[-1] * 0x01010101u;
1569 tr_right = (uint8_t *) &tr;
1573 AV_ZERO128(td->non_zero_count_cache);
1575 for (y = 0; y < 4; y++) {
1576 uint8_t *topright = ptr + 4 - s->linesize;
1577 for (x = 0; x < 4; x++) {
1578 int copy = 0, linesize = s->linesize;
1579 uint8_t *dst = ptr + 4 * x;
1580 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1582 if ((y == 0 || x == 3) && mb_y == 0) {
1585 topright = tr_right;
1587 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1588 mb_y + y, ©, is_vp7);
1590 dst = copy_dst + 12;
1594 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1596 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1600 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1609 copy_dst[11] = ptr[4 * x - 1];
1610 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1611 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1612 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1615 s->hpc.pred4x4[mode](dst, topright, linesize);
1617 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1618 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1619 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1620 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1623 nnz = td->non_zero_count_cache[y][x];
1626 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1627 td->block[y][x], s->linesize);
1629 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1630 td->block[y][x], s->linesize);
1635 ptr += 4 * s->linesize;
1640 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1641 mb_x, mb_y, is_vp7);
1642 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1643 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1645 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1646 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1647 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1648 s->filter.simple, 0);
1651 static const uint8_t subpel_idx[3][8] = {
1652 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1653 // also function pointer index
1654 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1655 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1661 * @param s VP8 decoding context
1662 * @param dst target buffer for block data at block position
1663 * @param ref reference picture buffer at origin (0, 0)
1664 * @param mv motion vector (relative to block position) to get pixel data from
1665 * @param x_off horizontal position of block from origin (0, 0)
1666 * @param y_off vertical position of block from origin (0, 0)
1667 * @param block_w width of block (16, 8 or 4)
1668 * @param block_h height of block (always same as block_w)
1669 * @param width width of src/dst plane data
1670 * @param height height of src/dst plane data
1671 * @param linesize size of a single line of plane data, including padding
1672 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1674 static av_always_inline
1675 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1676 ThreadFrame *ref, const VP56mv *mv,
1677 int x_off, int y_off, int block_w, int block_h,
1678 int width, int height, ptrdiff_t linesize,
1679 vp8_mc_func mc_func[3][3])
1681 uint8_t *src = ref->f->data[0];
1684 int src_linesize = linesize;
1686 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1687 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1689 x_off += mv->x >> 2;
1690 y_off += mv->y >> 2;
1693 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1694 src += y_off * linesize + x_off;
1695 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1696 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1697 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1698 src - my_idx * linesize - mx_idx,
1699 EDGE_EMU_LINESIZE, linesize,
1700 block_w + subpel_idx[1][mx],
1701 block_h + subpel_idx[1][my],
1702 x_off - mx_idx, y_off - my_idx,
1704 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1705 src_linesize = EDGE_EMU_LINESIZE;
1707 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1709 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1710 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1711 linesize, block_h, 0, 0);
1716 * chroma MC function
1718 * @param s VP8 decoding context
1719 * @param dst1 target buffer for block data at block position (U plane)
1720 * @param dst2 target buffer for block data at block position (V plane)
1721 * @param ref reference picture buffer at origin (0, 0)
1722 * @param mv motion vector (relative to block position) to get pixel data from
1723 * @param x_off horizontal position of block from origin (0, 0)
1724 * @param y_off vertical position of block from origin (0, 0)
1725 * @param block_w width of block (16, 8 or 4)
1726 * @param block_h height of block (always same as block_w)
1727 * @param width width of src/dst plane data
1728 * @param height height of src/dst plane data
1729 * @param linesize size of a single line of plane data, including padding
1730 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1732 static av_always_inline
1733 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1734 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1735 int x_off, int y_off, int block_w, int block_h,
1736 int width, int height, ptrdiff_t linesize,
1737 vp8_mc_func mc_func[3][3])
1739 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1742 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1743 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1745 x_off += mv->x >> 3;
1746 y_off += mv->y >> 3;
1749 src1 += y_off * linesize + x_off;
1750 src2 += y_off * linesize + x_off;
1751 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1752 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1753 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1754 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1755 src1 - my_idx * linesize - mx_idx,
1756 EDGE_EMU_LINESIZE, linesize,
1757 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1758 x_off - mx_idx, y_off - my_idx, width, height);
1759 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1760 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1762 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1763 src2 - my_idx * linesize - mx_idx,
1764 EDGE_EMU_LINESIZE, linesize,
1765 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1766 x_off - mx_idx, y_off - my_idx, width, height);
1767 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1768 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1770 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1771 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1774 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1775 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1776 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1780 static av_always_inline
1781 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1782 ThreadFrame *ref_frame, int x_off, int y_off,
1783 int bx_off, int by_off, int block_w, int block_h,
1784 int width, int height, VP56mv *mv)
1789 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1790 ref_frame, mv, x_off + bx_off, y_off + by_off,
1791 block_w, block_h, width, height, s->linesize,
1792 s->put_pixels_tab[block_w == 8]);
1795 if (s->profile == 3) {
1796 /* this block only applies VP8; it is safe to check
1797 * only the profile, as VP7 profile <= 1 */
1809 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1810 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1811 &uvmv, x_off + bx_off, y_off + by_off,
1812 block_w, block_h, width, height, s->uvlinesize,
1813 s->put_pixels_tab[1 + (block_w == 4)]);
1816 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1817 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1818 static av_always_inline
1819 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1822 /* Don't prefetch refs that haven't been used very often this frame. */
1823 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1824 int x_off = mb_x << 4, y_off = mb_y << 4;
1825 int mx = (mb->mv.x >> 2) + x_off + 8;
1826 int my = (mb->mv.y >> 2) + y_off;
1827 uint8_t **src = s->framep[ref]->tf.f->data;
1828 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1829 /* For threading, a ff_thread_await_progress here might be useful, but
1830 * it actually slows down the decoder. Since a bad prefetch doesn't
1831 * generate bad decoder output, we don't run it here. */
1832 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1833 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1834 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1839 * Apply motion vectors to prediction buffer, chapter 18.
1841 static av_always_inline
1842 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1843 VP8Macroblock *mb, int mb_x, int mb_y)
1845 int x_off = mb_x << 4, y_off = mb_y << 4;
1846 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1847 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1848 VP56mv *bmv = mb->bmv;
1850 switch (mb->partitioning) {
1851 case VP8_SPLITMVMODE_NONE:
1852 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1853 0, 0, 16, 16, width, height, &mb->mv);
1855 case VP8_SPLITMVMODE_4x4: {
1860 for (y = 0; y < 4; y++) {
1861 for (x = 0; x < 4; x++) {
1862 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1863 ref, &bmv[4 * y + x],
1864 4 * x + x_off, 4 * y + y_off, 4, 4,
1865 width, height, s->linesize,
1866 s->put_pixels_tab[2]);
1875 for (y = 0; y < 2; y++) {
1876 for (x = 0; x < 2; x++) {
1877 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1878 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1879 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1880 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1881 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1882 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1883 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1884 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1885 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT - 1))) >> 2;
1886 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT - 1))) >> 2;
1887 if (s->profile == 3) {
1891 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1892 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1893 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1894 width, height, s->uvlinesize,
1895 s->put_pixels_tab[2]);
1900 case VP8_SPLITMVMODE_16x8:
1901 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1902 0, 0, 16, 8, width, height, &bmv[0]);
1903 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1904 0, 8, 16, 8, width, height, &bmv[1]);
1906 case VP8_SPLITMVMODE_8x16:
1907 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1908 0, 0, 8, 16, width, height, &bmv[0]);
1909 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1910 8, 0, 8, 16, width, height, &bmv[1]);
1912 case VP8_SPLITMVMODE_8x8:
1913 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1914 0, 0, 8, 8, width, height, &bmv[0]);
1915 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1916 8, 0, 8, 8, width, height, &bmv[1]);
1917 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1918 0, 8, 8, 8, width, height, &bmv[2]);
1919 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1920 8, 8, 8, 8, width, height, &bmv[3]);
1925 static av_always_inline
1926 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1930 if (mb->mode != MODE_I4x4) {
1931 uint8_t *y_dst = dst[0];
1932 for (y = 0; y < 4; y++) {
1933 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1935 if (nnz4 & ~0x01010101) {
1936 for (x = 0; x < 4; x++) {
1937 if ((uint8_t) nnz4 == 1)
1938 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1941 else if ((uint8_t) nnz4 > 1)
1942 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
1950 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1953 y_dst += 4 * s->linesize;
1957 for (ch = 0; ch < 2; ch++) {
1958 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
1960 uint8_t *ch_dst = dst[1 + ch];
1961 if (nnz4 & ~0x01010101) {
1962 for (y = 0; y < 2; y++) {
1963 for (x = 0; x < 2; x++) {
1964 if ((uint8_t) nnz4 == 1)
1965 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
1966 td->block[4 + ch][(y << 1) + x],
1968 else if ((uint8_t) nnz4 > 1)
1969 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
1970 td->block[4 + ch][(y << 1) + x],
1974 goto chroma_idct_end;
1976 ch_dst += 4 * s->uvlinesize;
1979 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
1987 static av_always_inline
1988 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
1989 VP8FilterStrength *f, int is_vp7)
1991 int interior_limit, filter_level;
1993 if (s->segmentation.enabled) {
1994 filter_level = s->segmentation.filter_level[mb->segment];
1995 if (!s->segmentation.absolute_vals)
1996 filter_level += s->filter.level;
1998 filter_level = s->filter.level;
2000 if (s->lf_delta.enabled) {
2001 filter_level += s->lf_delta.ref[mb->ref_frame];
2002 filter_level += s->lf_delta.mode[mb->mode];
2005 filter_level = av_clip_uintp2(filter_level, 6);
2007 interior_limit = filter_level;
2008 if (s->filter.sharpness) {
2009 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2010 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2012 interior_limit = FFMAX(interior_limit, 1);
2014 f->filter_level = filter_level;
2015 f->inner_limit = interior_limit;
2016 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2017 mb->mode == VP8_MVMODE_SPLIT;
2020 static av_always_inline
2021 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2022 int mb_x, int mb_y, int is_vp7)
2024 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2025 int filter_level = f->filter_level;
2026 int inner_limit = f->inner_limit;
2027 int inner_filter = f->inner_filter;
2028 int linesize = s->linesize;
2029 int uvlinesize = s->uvlinesize;
2030 static const uint8_t hev_thresh_lut[2][64] = {
2031 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2032 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2033 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2035 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2036 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2037 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2045 bedge_lim_y = filter_level;
2046 bedge_lim_uv = filter_level * 2;
2047 mbedge_lim = filter_level + 2;
2050 bedge_lim_uv = filter_level * 2 + inner_limit;
2051 mbedge_lim = bedge_lim_y + 4;
2054 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2057 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2058 mbedge_lim, inner_limit, hev_thresh);
2059 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2060 mbedge_lim, inner_limit, hev_thresh);
2063 #define H_LOOP_FILTER_16Y_INNER(cond) \
2064 if (cond && inner_filter) { \
2065 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2066 bedge_lim_y, inner_limit, \
2068 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2069 bedge_lim_y, inner_limit, \
2071 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2072 bedge_lim_y, inner_limit, \
2074 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2075 uvlinesize, bedge_lim_uv, \
2076 inner_limit, hev_thresh); \
2079 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2082 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2083 mbedge_lim, inner_limit, hev_thresh);
2084 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2085 mbedge_lim, inner_limit, hev_thresh);
2089 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2090 linesize, bedge_lim_y,
2091 inner_limit, hev_thresh);
2092 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2093 linesize, bedge_lim_y,
2094 inner_limit, hev_thresh);
2095 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2096 linesize, bedge_lim_y,
2097 inner_limit, hev_thresh);
2098 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2099 dst[2] + 4 * uvlinesize,
2100 uvlinesize, bedge_lim_uv,
2101 inner_limit, hev_thresh);
2104 H_LOOP_FILTER_16Y_INNER(is_vp7)
2107 static av_always_inline
2108 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2111 int mbedge_lim, bedge_lim;
2112 int filter_level = f->filter_level;
2113 int inner_limit = f->inner_limit;
2114 int inner_filter = f->inner_filter;
2115 int linesize = s->linesize;
2120 bedge_lim = 2 * filter_level + inner_limit;
2121 mbedge_lim = bedge_lim + 4;
2124 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2126 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2127 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2128 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2132 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2134 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2135 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2136 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2140 #define MARGIN (16 << 2)
2141 static av_always_inline
2142 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2143 VP8Frame *prev_frame, int is_vp7)
2145 VP8Context *s = avctx->priv_data;
2148 s->mv_min.y = -MARGIN;
2149 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2150 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2151 VP8Macroblock *mb = s->macroblocks_base +
2152 ((s->mb_width + 1) * (mb_y + 1) + 1);
2153 int mb_xy = mb_y * s->mb_width;
2155 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2157 s->mv_min.x = -MARGIN;
2158 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2159 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2161 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2162 DC_PRED * 0x01010101);
2163 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2164 prev_frame && prev_frame->seg_map ?
2165 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2174 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2175 VP8Frame *prev_frame)
2177 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2180 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2181 VP8Frame *prev_frame)
2183 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2187 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2189 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2190 if (otd->thread_mb_pos < tmp) { \
2191 pthread_mutex_lock(&otd->lock); \
2192 td->wait_mb_pos = tmp; \
2194 if (otd->thread_mb_pos >= tmp) \
2196 pthread_cond_wait(&otd->cond, &otd->lock); \
2198 td->wait_mb_pos = INT_MAX; \
2199 pthread_mutex_unlock(&otd->lock); \
2203 #define update_pos(td, mb_y, mb_x) \
2205 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2206 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2208 int is_null = !next_td || !prev_td; \
2209 int pos_check = (is_null) ? 1 \
2210 : (next_td != td && \
2211 pos >= next_td->wait_mb_pos) || \
2213 pos >= prev_td->wait_mb_pos); \
2214 td->thread_mb_pos = pos; \
2215 if (sliced_threading && pos_check) { \
2216 pthread_mutex_lock(&td->lock); \
2217 pthread_cond_broadcast(&td->cond); \
2218 pthread_mutex_unlock(&td->lock); \
2222 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2223 #define update_pos(td, mb_y, mb_x)
2226 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2227 int jobnr, int threadnr, int is_vp7)
2229 VP8Context *s = avctx->priv_data;
2230 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2231 int mb_y = td->thread_mb_pos >> 16;
2232 int mb_x, mb_xy = mb_y * s->mb_width;
2233 int num_jobs = s->num_jobs;
2234 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2235 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2238 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2239 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2240 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2245 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2246 if (mb_y == s->mb_height - 1)
2249 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2250 if (s->mb_layout == 1)
2251 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2253 // Make sure the previous frame has read its segmentation map,
2254 // if we re-use the same map.
2255 if (prev_frame && s->segmentation.enabled &&
2256 !s->segmentation.update_map)
2257 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2258 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2259 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2260 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2263 if (!is_vp7 || mb_y == 0)
2264 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2266 s->mv_min.x = -MARGIN;
2267 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2269 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2270 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2271 if (prev_td != td) {
2272 if (threadnr != 0) {
2273 check_thread_pos(td, prev_td,
2274 mb_x + (is_vp7 ? 2 : 1),
2275 mb_y - (is_vp7 ? 2 : 1));
2277 check_thread_pos(td, prev_td,
2278 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2279 mb_y - (is_vp7 ? 2 : 1));
2283 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2285 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2286 dst[2] - dst[1], 2);
2289 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2290 prev_frame && prev_frame->seg_map ?
2291 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2293 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2296 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2298 if (mb->mode <= MODE_I4x4)
2299 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2301 inter_predict(s, td, dst, mb, mb_x, mb_y);
2303 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2306 idct_mb(s, td, dst, mb);
2308 AV_ZERO64(td->left_nnz);
2309 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2311 /* Reset DC block predictors if they would exist
2312 * if the mb had coefficients */
2313 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2314 td->left_nnz[8] = 0;
2315 s->top_nnz[mb_x][8] = 0;
2319 if (s->deblock_filter)
2320 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2322 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2323 if (s->filter.simple)
2324 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2325 NULL, NULL, s->linesize, 0, 1);
2327 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2328 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2331 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2339 if (mb_x == s->mb_width + 1) {
2340 update_pos(td, mb_y, s->mb_width + 3);
2342 update_pos(td, mb_y, mb_x);
2347 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2348 int jobnr, int threadnr, int is_vp7)
2350 VP8Context *s = avctx->priv_data;
2351 VP8ThreadData *td = &s->thread_data[threadnr];
2352 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2353 AVFrame *curframe = s->curframe->tf.f;
2355 VP8ThreadData *prev_td, *next_td;
2357 curframe->data[0] + 16 * mb_y * s->linesize,
2358 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2359 curframe->data[2] + 8 * mb_y * s->uvlinesize
2362 if (s->mb_layout == 1)
2363 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2365 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2370 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2371 if (mb_y == s->mb_height - 1)
2374 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2376 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2377 VP8FilterStrength *f = &td->filter_strength[mb_x];
2379 check_thread_pos(td, prev_td,
2380 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2382 if (next_td != &s->thread_data[0])
2383 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2385 if (num_jobs == 1) {
2386 if (s->filter.simple)
2387 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2388 NULL, NULL, s->linesize, 0, 1);
2390 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2391 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2394 if (s->filter.simple)
2395 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2397 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2402 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2406 static av_always_inline
2407 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2408 int threadnr, int is_vp7)
2410 VP8Context *s = avctx->priv_data;
2411 VP8ThreadData *td = &s->thread_data[jobnr];
2412 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2413 VP8Frame *curframe = s->curframe;
2414 int mb_y, num_jobs = s->num_jobs;
2416 td->thread_nr = threadnr;
2417 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2418 if (mb_y >= s->mb_height)
2420 td->thread_mb_pos = mb_y << 16;
2421 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
2422 if (s->deblock_filter)
2423 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
2424 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2429 if (avctx->active_thread_type == FF_THREAD_FRAME)
2430 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2436 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2437 int jobnr, int threadnr)
2439 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2442 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2443 int jobnr, int threadnr)
2445 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2449 static av_always_inline
2450 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2451 AVPacket *avpkt, int is_vp7)
2453 VP8Context *s = avctx->priv_data;
2454 int ret, i, referenced, num_jobs;
2455 enum AVDiscard skip_thresh;
2456 VP8Frame *av_uninit(curframe), *prev_frame;
2459 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2461 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2466 prev_frame = s->framep[VP56_FRAME_CURRENT];
2468 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2469 s->update_altref == VP56_FRAME_CURRENT;
2471 skip_thresh = !referenced ? AVDISCARD_NONREF
2472 : !s->keyframe ? AVDISCARD_NONKEY
2475 if (avctx->skip_frame >= skip_thresh) {
2477 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2480 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2482 // release no longer referenced frames
2483 for (i = 0; i < 5; i++)
2484 if (s->frames[i].tf.f->data[0] &&
2485 &s->frames[i] != prev_frame &&
2486 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2487 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2488 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2489 vp8_release_frame(s, &s->frames[i]);
2491 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2493 /* Given that arithmetic probabilities are updated every frame, it's quite
2494 * likely that the values we have on a random interframe are complete
2495 * junk if we didn't start decode on a keyframe. So just don't display
2496 * anything rather than junk. */
2497 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2498 !s->framep[VP56_FRAME_GOLDEN] ||
2499 !s->framep[VP56_FRAME_GOLDEN2])) {
2500 av_log(avctx, AV_LOG_WARNING,
2501 "Discarding interframe without a prior keyframe!\n");
2502 ret = AVERROR_INVALIDDATA;
2506 curframe->tf.f->key_frame = s->keyframe;
2507 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2508 : AV_PICTURE_TYPE_P;
2509 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
2510 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
2514 // check if golden and altref are swapped
2515 if (s->update_altref != VP56_FRAME_NONE)
2516 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2518 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2520 if (s->update_golden != VP56_FRAME_NONE)
2521 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2523 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2526 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2528 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2530 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2532 ff_thread_finish_setup(avctx);
2534 s->linesize = curframe->tf.f->linesize[0];
2535 s->uvlinesize = curframe->tf.f->linesize[1];
2537 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2538 /* Zero macroblock structures for top/top-left prediction
2539 * from outside the frame. */
2541 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2542 (s->mb_width + 1) * sizeof(*s->macroblocks));
2543 if (!s->mb_layout && s->keyframe)
2544 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2546 memset(s->ref_count, 0, sizeof(s->ref_count));
2548 if (s->mb_layout == 1) {
2549 // Make sure the previous frame has read its segmentation map,
2550 // if we re-use the same map.
2551 if (prev_frame && s->segmentation.enabled &&
2552 !s->segmentation.update_map)
2553 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2555 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2557 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2560 if (avctx->active_thread_type == FF_THREAD_FRAME)
2563 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2564 s->num_jobs = num_jobs;
2565 s->curframe = curframe;
2566 s->prev_frame = prev_frame;
2567 s->mv_min.y = -MARGIN;
2568 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2569 for (i = 0; i < MAX_THREADS; i++) {
2570 s->thread_data[i].thread_mb_pos = 0;
2571 s->thread_data[i].wait_mb_pos = INT_MAX;
2574 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2577 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2580 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2581 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2584 // if future frames don't use the updated probabilities,
2585 // reset them to the values we saved
2586 if (!s->update_probabilities)
2587 s->prob[0] = s->prob[1];
2589 if (!s->invisible) {
2590 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2597 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2601 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2604 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2607 #if CONFIG_VP7_DECODER
2608 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2611 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2613 #endif /* CONFIG_VP7_DECODER */
2615 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2617 VP8Context *s = avctx->priv_data;
2620 vp8_decode_flush_impl(avctx, 1);
2621 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2622 av_frame_free(&s->frames[i].tf.f);
2627 static av_cold int vp8_init_frames(VP8Context *s)
2630 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2631 s->frames[i].tf.f = av_frame_alloc();
2632 if (!s->frames[i].tf.f)
2633 return AVERROR(ENOMEM);
2638 static av_always_inline
2639 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2641 VP8Context *s = avctx->priv_data;
2645 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2646 avctx->internal->allocate_progress = 1;
2648 ff_videodsp_init(&s->vdsp, 8);
2650 ff_vp78dsp_init(&s->vp8dsp);
2651 if (CONFIG_VP7_DECODER && is_vp7) {
2652 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2653 ff_vp7dsp_init(&s->vp8dsp);
2654 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2655 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2656 ff_vp8dsp_init(&s->vp8dsp);
2659 /* does not change for VP8 */
2660 memcpy(s->prob[0].scan, zigzag_scan, sizeof(s->prob[0].scan));
2662 if ((ret = vp8_init_frames(s)) < 0) {
2663 ff_vp8_decode_free(avctx);
2670 #if CONFIG_VP7_DECODER
2671 static int vp7_decode_init(AVCodecContext *avctx)
2673 return vp78_decode_init(avctx, IS_VP7);
2675 #endif /* CONFIG_VP7_DECODER */
2677 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2679 return vp78_decode_init(avctx, IS_VP8);
2682 #if CONFIG_VP8_DECODER
2683 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2685 VP8Context *s = avctx->priv_data;
2690 if ((ret = vp8_init_frames(s)) < 0) {
2691 ff_vp8_decode_free(avctx);
2698 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2700 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2701 const AVCodecContext *src)
2703 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2706 if (s->macroblocks_base &&
2707 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2709 s->mb_width = s_src->mb_width;
2710 s->mb_height = s_src->mb_height;
2713 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2714 s->segmentation = s_src->segmentation;
2715 s->lf_delta = s_src->lf_delta;
2716 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2718 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2719 if (s_src->frames[i].tf.f->data[0]) {
2720 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2726 s->framep[0] = REBASE(s_src->next_framep[0]);
2727 s->framep[1] = REBASE(s_src->next_framep[1]);
2728 s->framep[2] = REBASE(s_src->next_framep[2]);
2729 s->framep[3] = REBASE(s_src->next_framep[3]);
2733 #endif /* CONFIG_VP8_DECODER */
2735 #if CONFIG_VP7_DECODER
2736 AVCodec ff_vp7_decoder = {
2738 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2739 .type = AVMEDIA_TYPE_VIDEO,
2740 .id = AV_CODEC_ID_VP7,
2741 .priv_data_size = sizeof(VP8Context),
2742 .init = vp7_decode_init,
2743 .close = ff_vp8_decode_free,
2744 .decode = vp7_decode_frame,
2745 .capabilities = CODEC_CAP_DR1,
2746 .flush = vp8_decode_flush,
2748 #endif /* CONFIG_VP7_DECODER */
2750 #if CONFIG_VP8_DECODER
2751 AVCodec ff_vp8_decoder = {
2753 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2754 .type = AVMEDIA_TYPE_VIDEO,
2755 .id = AV_CODEC_ID_VP8,
2756 .priv_data_size = sizeof(VP8Context),
2757 .init = ff_vp8_decode_init,
2758 .close = ff_vp8_decode_free,
2759 .decode = ff_vp8_decode_frame,
2760 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2761 .flush = vp8_decode_flush,
2762 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2763 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2765 #endif /* CONFIG_VP7_DECODER */