2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of FFmpeg.
12 * FFmpeg is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * FFmpeg is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with FFmpeg; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
32 #include "rectangle.h"
41 #if CONFIG_VP7_DECODER && CONFIG_VP8_DECODER
42 #define VPX(vp7, f) (vp7 ? vp7_ ## f : vp8_ ## f)
43 #elif CONFIG_VP7_DECODER
44 #define VPX(vp7, f) vp7_ ## f
45 #else // CONFIG_VP8_DECODER
46 #define VPX(vp7, f) vp8_ ## f
49 static void free_buffers(VP8Context *s)
53 for (i = 0; i < MAX_THREADS; i++) {
55 pthread_cond_destroy(&s->thread_data[i].cond);
56 pthread_mutex_destroy(&s->thread_data[i].lock);
58 av_freep(&s->thread_data[i].filter_strength);
60 av_freep(&s->thread_data);
61 av_freep(&s->macroblocks_base);
62 av_freep(&s->intra4x4_pred_mode_top);
63 av_freep(&s->top_nnz);
64 av_freep(&s->top_border);
66 s->macroblocks = NULL;
69 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
72 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
73 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
75 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
76 ff_thread_release_buffer(s->avctx, &f->tf);
77 return AVERROR(ENOMEM);
82 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
84 av_buffer_unref(&f->seg_map);
85 ff_thread_release_buffer(s->avctx, &f->tf);
88 #if CONFIG_VP8_DECODER
89 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
93 vp8_release_frame(s, dst);
95 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
98 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
99 vp8_release_frame(s, dst);
100 return AVERROR(ENOMEM);
105 #endif /* CONFIG_VP8_DECODER */
107 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
109 VP8Context *s = avctx->priv_data;
112 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
113 vp8_release_frame(s, &s->frames[i]);
114 memset(s->framep, 0, sizeof(s->framep));
120 static void vp8_decode_flush(AVCodecContext *avctx)
122 vp8_decode_flush_impl(avctx, 0);
125 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
127 VP8Frame *frame = NULL;
130 // find a free buffer
131 for (i = 0; i < 5; i++)
132 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
133 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
134 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
135 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
136 frame = &s->frames[i];
140 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
143 if (frame->tf.f->data[0])
144 vp8_release_frame(s, frame);
149 static av_always_inline
150 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
152 AVCodecContext *avctx = s->avctx;
155 if (width != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
156 height != s->avctx->height) {
157 vp8_decode_flush_impl(s->avctx, 1);
159 ret = ff_set_dimensions(s->avctx, width, height);
164 s->mb_width = (s->avctx->coded_width + 15) / 16;
165 s->mb_height = (s->avctx->coded_height + 15) / 16;
167 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
168 avctx->thread_count > 1;
169 if (!s->mb_layout) { // Frame threading and one thread
170 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
171 sizeof(*s->macroblocks));
172 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
173 } else // Sliced threading
174 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
175 sizeof(*s->macroblocks));
176 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
177 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
178 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
180 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
181 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
183 return AVERROR(ENOMEM);
186 for (i = 0; i < MAX_THREADS; i++) {
187 s->thread_data[i].filter_strength =
188 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
189 if (!s->thread_data[i].filter_strength) {
191 return AVERROR(ENOMEM);
194 pthread_mutex_init(&s->thread_data[i].lock, NULL);
195 pthread_cond_init(&s->thread_data[i].cond, NULL);
199 s->macroblocks = s->macroblocks_base + 1;
204 static int vp7_update_dimensions(VP8Context *s, int width, int height)
206 return update_dimensions(s, width, height, IS_VP7);
209 static int vp8_update_dimensions(VP8Context *s, int width, int height)
211 return update_dimensions(s, width, height, IS_VP8);
215 static void parse_segment_info(VP8Context *s)
217 VP56RangeCoder *c = &s->c;
220 s->segmentation.update_map = vp8_rac_get(c);
222 if (vp8_rac_get(c)) { // update segment feature data
223 s->segmentation.absolute_vals = vp8_rac_get(c);
225 for (i = 0; i < 4; i++)
226 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
228 for (i = 0; i < 4; i++)
229 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
231 if (s->segmentation.update_map)
232 for (i = 0; i < 3; i++)
233 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
236 static void update_lf_deltas(VP8Context *s)
238 VP56RangeCoder *c = &s->c;
241 for (i = 0; i < 4; i++) {
242 if (vp8_rac_get(c)) {
243 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
246 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
250 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
251 if (vp8_rac_get(c)) {
252 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
255 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
260 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
262 const uint8_t *sizes = buf;
265 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
267 buf += 3 * (s->num_coeff_partitions - 1);
268 buf_size -= 3 * (s->num_coeff_partitions - 1);
272 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
273 int size = AV_RL24(sizes + 3 * i);
274 if (buf_size - size < 0)
277 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
281 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
286 static void vp7_get_quants(VP8Context *s)
288 VP56RangeCoder *c = &s->c;
290 int yac_qi = vp8_rac_get_uint(c, 7);
291 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
292 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
293 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
294 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
295 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
297 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
298 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
299 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
300 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
301 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
302 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
305 static void vp8_get_quants(VP8Context *s)
307 VP56RangeCoder *c = &s->c;
310 int yac_qi = vp8_rac_get_uint(c, 7);
311 int ydc_delta = vp8_rac_get_sint(c, 4);
312 int y2dc_delta = vp8_rac_get_sint(c, 4);
313 int y2ac_delta = vp8_rac_get_sint(c, 4);
314 int uvdc_delta = vp8_rac_get_sint(c, 4);
315 int uvac_delta = vp8_rac_get_sint(c, 4);
317 for (i = 0; i < 4; i++) {
318 if (s->segmentation.enabled) {
319 base_qi = s->segmentation.base_quant[i];
320 if (!s->segmentation.absolute_vals)
325 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta, 7)];
326 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
327 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)] * 2;
328 /* 101581>>16 is equivalent to 155/100 */
329 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)] * 101581 >> 16;
330 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
331 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
333 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
334 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
339 * Determine which buffers golden and altref should be updated with after this frame.
340 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
342 * Intra frames update all 3 references
343 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
344 * If the update (golden|altref) flag is set, it's updated with the current frame
345 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
346 * If the flag is not set, the number read means:
348 * 1: VP56_FRAME_PREVIOUS
349 * 2: update golden with altref, or update altref with golden
351 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
353 VP56RangeCoder *c = &s->c;
356 return VP56_FRAME_CURRENT;
358 switch (vp8_rac_get_uint(c, 2)) {
360 return VP56_FRAME_PREVIOUS;
362 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
364 return VP56_FRAME_NONE;
367 static void vp78_reset_probability_tables(VP8Context *s)
370 for (i = 0; i < 4; i++)
371 for (j = 0; j < 16; j++)
372 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
373 sizeof(s->prob->token[i][j]));
376 static void vp78_update_probability_tables(VP8Context *s)
378 VP56RangeCoder *c = &s->c;
381 for (i = 0; i < 4; i++)
382 for (j = 0; j < 8; j++)
383 for (k = 0; k < 3; k++)
384 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
385 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
386 int prob = vp8_rac_get_uint(c, 8);
387 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
388 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
392 #define VP7_MVC_SIZE 17
393 #define VP8_MVC_SIZE 19
395 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
398 VP56RangeCoder *c = &s->c;
402 for (i = 0; i < 4; i++)
403 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
405 for (i = 0; i < 3; i++)
406 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
408 // 17.2 MV probability update
409 for (i = 0; i < 2; i++)
410 for (j = 0; j < mvc_size; j++)
411 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
412 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
415 static void update_refs(VP8Context *s)
417 VP56RangeCoder *c = &s->c;
419 int update_golden = vp8_rac_get(c);
420 int update_altref = vp8_rac_get(c);
422 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
423 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
426 static void copy_chroma(AVFrame *dst, AVFrame *src, int width, int height)
430 for (j = 1; j < 3; j++) {
431 for (i = 0; i < height / 2; i++)
432 memcpy(dst->data[j] + i * dst->linesize[j],
433 src->data[j] + i * src->linesize[j], width / 2);
437 static void fade(uint8_t *dst, int dst_linesize,
438 const uint8_t *src, int src_linesize,
439 int width, int height,
443 for (j = 0; j < height; j++) {
444 for (i = 0; i < width; i++) {
445 uint8_t y = src[j * src_linesize + i];
446 dst[j * dst_linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
451 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
453 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
454 int beta = (int8_t) vp8_rac_get_uint(c, 8);
457 if (!s->keyframe && (alpha || beta)) {
458 int width = s->mb_width * 16;
459 int height = s->mb_height * 16;
462 if (!s->framep[VP56_FRAME_PREVIOUS] ||
463 !s->framep[VP56_FRAME_GOLDEN]) {
464 av_log(s->avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
465 return AVERROR_INVALIDDATA;
469 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
471 /* preserve the golden frame, write a new previous frame */
472 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
473 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
474 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
477 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
479 copy_chroma(dst, src, width, height);
482 fade(dst->data[0], dst->linesize[0],
483 src->data[0], src->linesize[0],
484 width, height, alpha, beta);
490 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
492 VP56RangeCoder *c = &s->c;
493 int part1_size, hscale, vscale, i, j, ret;
494 int width = s->avctx->width;
495 int height = s->avctx->height;
498 return AVERROR_INVALIDDATA;
501 s->profile = (buf[0] >> 1) & 7;
502 if (s->profile > 1) {
503 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
504 return AVERROR_INVALIDDATA;
507 s->keyframe = !(buf[0] & 1);
509 part1_size = AV_RL24(buf) >> 4;
511 if (buf_size < 4 - s->profile + part1_size) {
512 av_log(s->avctx, AV_LOG_ERROR, "Buffer size %d is too small, needed : %d\n", buf_size, 4 - s->profile + part1_size);
513 return AVERROR_INVALIDDATA;
516 buf += 4 - s->profile;
517 buf_size -= 4 - s->profile;
519 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
521 ff_vp56_init_range_decoder(c, buf, part1_size);
523 buf_size -= part1_size;
525 /* A. Dimension information (keyframes only) */
527 width = vp8_rac_get_uint(c, 12);
528 height = vp8_rac_get_uint(c, 12);
529 hscale = vp8_rac_get_uint(c, 2);
530 vscale = vp8_rac_get_uint(c, 2);
531 if (hscale || vscale)
532 avpriv_request_sample(s->avctx, "Upscaling");
534 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
535 vp78_reset_probability_tables(s);
536 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
537 sizeof(s->prob->pred16x16));
538 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
539 sizeof(s->prob->pred8x8c));
540 for (i = 0; i < 2; i++)
541 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
542 sizeof(vp7_mv_default_prob[i]));
543 memset(&s->segmentation, 0, sizeof(s->segmentation));
544 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
545 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
548 if (s->keyframe || s->profile > 0)
549 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
551 /* B. Decoding information for all four macroblock-level features */
552 for (i = 0; i < 4; i++) {
553 s->feature_enabled[i] = vp8_rac_get(c);
554 if (s->feature_enabled[i]) {
555 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
557 for (j = 0; j < 3; j++)
558 s->feature_index_prob[i][j] =
559 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
561 if (vp7_feature_value_size[s->profile][i])
562 for (j = 0; j < 4; j++)
563 s->feature_value[i][j] =
564 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
568 s->segmentation.enabled = 0;
569 s->segmentation.update_map = 0;
570 s->lf_delta.enabled = 0;
572 s->num_coeff_partitions = 1;
573 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
575 if (!s->macroblocks_base || /* first frame */
576 width != s->avctx->width || height != s->avctx->height ||
577 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
578 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
582 /* C. Dequantization indices */
585 /* D. Golden frame update flag (a Flag) for interframes only */
587 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
588 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
592 s->update_probabilities = 1;
595 if (s->profile > 0) {
596 s->update_probabilities = vp8_rac_get(c);
597 if (!s->update_probabilities)
598 s->prob[1] = s->prob[0];
601 s->fade_present = vp8_rac_get(c);
604 /* E. Fading information for previous frame */
605 if (s->fade_present && vp8_rac_get(c)) {
606 if ((ret = vp7_fade_frame(s ,c)) < 0)
610 /* F. Loop filter type */
612 s->filter.simple = vp8_rac_get(c);
614 /* G. DCT coefficient ordering specification */
616 for (i = 1; i < 16; i++)
617 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
619 /* H. Loop filter levels */
621 s->filter.simple = vp8_rac_get(c);
622 s->filter.level = vp8_rac_get_uint(c, 6);
623 s->filter.sharpness = vp8_rac_get_uint(c, 3);
625 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
626 vp78_update_probability_tables(s);
628 s->mbskip_enabled = 0;
630 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
632 s->prob->intra = vp8_rac_get_uint(c, 8);
633 s->prob->last = vp8_rac_get_uint(c, 8);
634 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
640 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
642 VP56RangeCoder *c = &s->c;
643 int header_size, hscale, vscale, ret;
644 int width = s->avctx->width;
645 int height = s->avctx->height;
648 av_log(s->avctx, AV_LOG_ERROR, "Insufficent data (%d) for header\n", buf_size);
649 return AVERROR_INVALIDDATA;
652 s->keyframe = !(buf[0] & 1);
653 s->profile = (buf[0]>>1) & 7;
654 s->invisible = !(buf[0] & 0x10);
655 header_size = AV_RL24(buf) >> 5;
660 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
663 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
664 sizeof(s->put_pixels_tab));
665 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
666 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
667 sizeof(s->put_pixels_tab));
669 if (header_size > buf_size - 7 * s->keyframe) {
670 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
671 return AVERROR_INVALIDDATA;
675 if (AV_RL24(buf) != 0x2a019d) {
676 av_log(s->avctx, AV_LOG_ERROR,
677 "Invalid start code 0x%x\n", AV_RL24(buf));
678 return AVERROR_INVALIDDATA;
680 width = AV_RL16(buf + 3) & 0x3fff;
681 height = AV_RL16(buf + 5) & 0x3fff;
682 hscale = buf[4] >> 6;
683 vscale = buf[6] >> 6;
687 if (hscale || vscale)
688 avpriv_request_sample(s->avctx, "Upscaling");
690 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
691 vp78_reset_probability_tables(s);
692 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
693 sizeof(s->prob->pred16x16));
694 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
695 sizeof(s->prob->pred8x8c));
696 memcpy(s->prob->mvc, vp8_mv_default_prob,
697 sizeof(s->prob->mvc));
698 memset(&s->segmentation, 0, sizeof(s->segmentation));
699 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
702 ff_vp56_init_range_decoder(c, buf, header_size);
704 buf_size -= header_size;
707 s->colorspace = vp8_rac_get(c);
709 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
710 s->fullrange = vp8_rac_get(c);
713 if ((s->segmentation.enabled = vp8_rac_get(c)))
714 parse_segment_info(s);
716 s->segmentation.update_map = 0; // FIXME: move this to some init function?
718 s->filter.simple = vp8_rac_get(c);
719 s->filter.level = vp8_rac_get_uint(c, 6);
720 s->filter.sharpness = vp8_rac_get_uint(c, 3);
722 if ((s->lf_delta.enabled = vp8_rac_get(c)))
726 if (setup_partitions(s, buf, buf_size)) {
727 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
728 return AVERROR_INVALIDDATA;
731 if (!s->macroblocks_base || /* first frame */
732 width != s->avctx->width || height != s->avctx->height ||
733 (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height)
734 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
741 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
742 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
745 // if we aren't saving this frame's probabilities for future frames,
746 // make a copy of the current probabilities
747 if (!(s->update_probabilities = vp8_rac_get(c)))
748 s->prob[1] = s->prob[0];
750 s->update_last = s->keyframe || vp8_rac_get(c);
752 vp78_update_probability_tables(s);
754 if ((s->mbskip_enabled = vp8_rac_get(c)))
755 s->prob->mbskip = vp8_rac_get_uint(c, 8);
758 s->prob->intra = vp8_rac_get_uint(c, 8);
759 s->prob->last = vp8_rac_get_uint(c, 8);
760 s->prob->golden = vp8_rac_get_uint(c, 8);
761 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
767 static av_always_inline
768 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
770 dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX),
771 av_clip(s->mv_max.x, INT16_MIN, INT16_MAX));
772 dst->y = av_clip(src->y, av_clip(s->mv_min.y, INT16_MIN, INT16_MAX),
773 av_clip(s->mv_max.y, INT16_MIN, INT16_MAX));
777 * Motion vector coding, 17.1.
779 static av_always_inline int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
783 if (vp56_rac_get_prob_branchy(c, p[0])) {
786 for (i = 0; i < 3; i++)
787 x += vp56_rac_get_prob(c, p[9 + i]) << i;
788 for (i = (vp7 ? 7 : 9); i > 3; i--)
789 x += vp56_rac_get_prob(c, p[9 + i]) << i;
790 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
794 const uint8_t *ps = p + 2;
795 bit = vp56_rac_get_prob(c, *ps);
798 bit = vp56_rac_get_prob(c, *ps);
801 x += vp56_rac_get_prob(c, *ps);
804 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
807 static int vp7_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
809 return read_mv_component(c, p, 1);
812 static int vp8_read_mv_component(VP56RangeCoder *c, const uint8_t *p)
814 return read_mv_component(c, p, 0);
817 static av_always_inline
818 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
821 return vp7_submv_prob;
824 return vp8_submv_prob[4 - !!left];
826 return vp8_submv_prob[2];
827 return vp8_submv_prob[1 - !!left];
831 * Split motion vector prediction, 16.4.
832 * @returns the number of motion vectors parsed (2, 4 or 16)
834 static av_always_inline
835 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
836 int layout, int is_vp7)
840 VP8Macroblock *top_mb;
841 VP8Macroblock *left_mb = &mb[-1];
842 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
843 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
845 VP56mv *left_mv = left_mb->bmv;
846 VP56mv *cur_mv = mb->bmv;
848 if (!layout) // layout is inlined, s->mb_layout is not
851 top_mb = &mb[-s->mb_width - 1];
852 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
853 top_mv = top_mb->bmv;
855 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
856 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
857 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
859 part_idx = VP8_SPLITMVMODE_8x8;
861 part_idx = VP8_SPLITMVMODE_4x4;
864 num = vp8_mbsplit_count[part_idx];
865 mbsplits_cur = vp8_mbsplits[part_idx],
866 firstidx = vp8_mbfirstidx[part_idx];
867 mb->partitioning = part_idx;
869 for (n = 0; n < num; n++) {
871 uint32_t left, above;
872 const uint8_t *submv_prob;
875 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
877 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
879 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
881 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
883 submv_prob = get_submv_prob(left, above, is_vp7);
885 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
886 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
887 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
888 mb->bmv[n].y = mb->mv.y +
889 read_mv_component(c, s->prob->mvc[0], is_vp7);
890 mb->bmv[n].x = mb->mv.x +
891 read_mv_component(c, s->prob->mvc[1], is_vp7);
893 AV_ZERO32(&mb->bmv[n]);
896 AV_WN32A(&mb->bmv[n], above);
899 AV_WN32A(&mb->bmv[n], left);
907 * The vp7 reference decoder uses a padding macroblock column (added to right
908 * edge of the frame) to guard against illegal macroblock offsets. The
909 * algorithm has bugs that permit offsets to straddle the padding column.
910 * This function replicates those bugs.
912 * @param[out] edge_x macroblock x address
913 * @param[out] edge_y macroblock y address
915 * @return macroblock offset legal (boolean)
917 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
918 int xoffset, int yoffset, int boundary,
919 int *edge_x, int *edge_y)
921 int vwidth = mb_width + 1;
922 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
923 if (new < boundary || new % vwidth == vwidth - 1)
925 *edge_y = new / vwidth;
926 *edge_x = new % vwidth;
930 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
932 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
935 static av_always_inline
936 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
937 int mb_x, int mb_y, int layout)
939 VP8Macroblock *mb_edge[12];
940 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
941 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
944 uint8_t cnt[3] = { 0 };
945 VP56RangeCoder *c = &s->c;
948 AV_ZERO32(&near_mv[0]);
949 AV_ZERO32(&near_mv[1]);
950 AV_ZERO32(&near_mv[2]);
952 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
953 const VP7MVPred * pred = &vp7_mv_pred[i];
956 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
957 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
958 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
959 ? s->macroblocks_base + 1 + edge_x +
960 (s->mb_width + 1) * (edge_y + 1)
961 : s->macroblocks + edge_x +
962 (s->mb_height - edge_y - 1) * 2;
963 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
965 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
966 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
968 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
969 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
973 AV_WN32A(&near_mv[CNT_NEAR], mv);
977 AV_WN32A(&near_mv[CNT_NEAREST], mv);
986 cnt[idx] += vp7_mv_pred[i].score;
989 mb->partitioning = VP8_SPLITMVMODE_NONE;
991 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
992 mb->mode = VP8_MVMODE_MV;
994 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
996 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
998 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
999 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1001 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1003 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1004 mb->mode = VP8_MVMODE_SPLIT;
1005 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1007 mb->mv.y += vp7_read_mv_component(c, s->prob->mvc[0]);
1008 mb->mv.x += vp7_read_mv_component(c, s->prob->mvc[1]);
1009 mb->bmv[0] = mb->mv;
1012 mb->mv = near_mv[CNT_NEAR];
1013 mb->bmv[0] = mb->mv;
1016 mb->mv = near_mv[CNT_NEAREST];
1017 mb->bmv[0] = mb->mv;
1020 mb->mode = VP8_MVMODE_ZERO;
1022 mb->bmv[0] = mb->mv;
1026 static av_always_inline
1027 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1028 int mb_x, int mb_y, int layout)
1030 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1033 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1034 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1036 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1037 int8_t *sign_bias = s->sign_bias;
1039 uint8_t cnt[4] = { 0 };
1040 VP56RangeCoder *c = &s->c;
1042 if (!layout) { // layout is inlined (s->mb_layout is not)
1043 mb_edge[0] = mb + 2;
1044 mb_edge[2] = mb + 1;
1046 mb_edge[0] = mb - s->mb_width - 1;
1047 mb_edge[2] = mb - s->mb_width - 2;
1050 AV_ZERO32(&near_mv[0]);
1051 AV_ZERO32(&near_mv[1]);
1052 AV_ZERO32(&near_mv[2]);
1054 /* Process MB on top, left and top-left */
1055 #define MV_EDGE_CHECK(n) \
1057 VP8Macroblock *edge = mb_edge[n]; \
1058 int edge_ref = edge->ref_frame; \
1059 if (edge_ref != VP56_FRAME_CURRENT) { \
1060 uint32_t mv = AV_RN32A(&edge->mv); \
1062 if (cur_sign_bias != sign_bias[edge_ref]) { \
1063 /* SWAR negate of the values in mv. */ \
1065 mv = ((mv & 0x7fff7fff) + \
1066 0x00010001) ^ (mv & 0x80008000); \
1068 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1069 AV_WN32A(&near_mv[++idx], mv); \
1070 cnt[idx] += 1 + (n != 2); \
1072 cnt[CNT_ZERO] += 1 + (n != 2); \
1080 mb->partitioning = VP8_SPLITMVMODE_NONE;
1081 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1082 mb->mode = VP8_MVMODE_MV;
1084 /* If we have three distinct MVs, merge first and last if they're the same */
1085 if (cnt[CNT_SPLITMV] &&
1086 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1087 cnt[CNT_NEAREST] += 1;
1089 /* Swap near and nearest if necessary */
1090 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1091 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1092 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1095 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1096 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1097 /* Choose the best mv out of 0,0 and the nearest mv */
1098 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1099 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1100 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1101 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1103 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1104 mb->mode = VP8_MVMODE_SPLIT;
1105 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1107 mb->mv.y += vp8_read_mv_component(c, s->prob->mvc[0]);
1108 mb->mv.x += vp8_read_mv_component(c, s->prob->mvc[1]);
1109 mb->bmv[0] = mb->mv;
1112 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1113 mb->bmv[0] = mb->mv;
1116 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1117 mb->bmv[0] = mb->mv;
1120 mb->mode = VP8_MVMODE_ZERO;
1122 mb->bmv[0] = mb->mv;
1126 static av_always_inline
1127 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1128 int mb_x, int keyframe, int layout)
1130 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1133 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1134 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1139 uint8_t *const left = s->intra4x4_pred_mode_left;
1141 top = mb->intra4x4_pred_mode_top;
1143 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1144 for (y = 0; y < 4; y++) {
1145 for (x = 0; x < 4; x++) {
1147 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1148 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1149 left[y] = top[x] = *intra4x4;
1155 for (i = 0; i < 16; i++)
1156 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1157 vp8_pred4x4_prob_inter);
1161 static av_always_inline
1162 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1163 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1165 VP56RangeCoder *c = &s->c;
1166 static const char *vp7_feature_name[] = { "q-index",
1168 "partial-golden-update",
1173 for (i = 0; i < 4; i++) {
1174 if (s->feature_enabled[i]) {
1175 if (vp56_rac_get_prob_branchy(c, s->feature_present_prob[i])) {
1176 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1177 s->feature_index_prob[i]);
1178 av_log(s->avctx, AV_LOG_WARNING,
1179 "Feature %s present in macroblock (value 0x%x)\n",
1180 vp7_feature_name[i], s->feature_value[i][index]);
1184 } else if (s->segmentation.update_map) {
1185 int bit = vp56_rac_get_prob(c, s->prob->segmentid[0]);
1186 *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
1187 } else if (s->segmentation.enabled)
1188 *segment = ref ? *ref : *segment;
1189 mb->segment = *segment;
1191 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1194 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1195 vp8_pred16x16_prob_intra);
1197 if (mb->mode == MODE_I4x4) {
1198 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1200 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1201 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1203 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1205 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1206 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1209 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1210 vp8_pred8x8c_prob_intra);
1211 mb->ref_frame = VP56_FRAME_CURRENT;
1212 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1214 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1216 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1217 : VP56_FRAME_GOLDEN;
1219 mb->ref_frame = VP56_FRAME_PREVIOUS;
1220 s->ref_count[mb->ref_frame - 1]++;
1222 // motion vectors, 16.3
1224 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1226 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1229 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1231 if (mb->mode == MODE_I4x4)
1232 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1234 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1236 mb->ref_frame = VP56_FRAME_CURRENT;
1237 mb->partitioning = VP8_SPLITMVMODE_NONE;
1238 AV_ZERO32(&mb->bmv[0]);
1243 * @param r arithmetic bitstream reader context
1244 * @param block destination for block coefficients
1245 * @param probs probabilities to use when reading trees from the bitstream
1246 * @param i initial coeff index, 0 unless a separate DC block is coded
1247 * @param qmul array holding the dc/ac dequant factor at position 0/1
1249 * @return 0 if no coeffs were decoded
1250 * otherwise, the index of the last coeff decoded plus one
1252 static av_always_inline
1253 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1254 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1255 int i, uint8_t *token_prob, int16_t qmul[2],
1256 const uint8_t scan[16], int vp7)
1258 VP56RangeCoder c = *r;
1263 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1267 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1269 break; // invalid input; blocks should end with EOB
1270 token_prob = probs[i][0];
1276 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1278 token_prob = probs[i + 1][1];
1280 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1281 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1283 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1287 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1288 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1289 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1290 } else { // DCT_CAT2
1292 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1293 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1295 } else { // DCT_CAT3 and up
1296 int a = vp56_rac_get_prob(&c, token_prob[8]);
1297 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1298 int cat = (a << 1) + b;
1299 coeff = 3 + (8 << cat);
1300 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1303 token_prob = probs[i + 1][2];
1305 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1312 static av_always_inline
1313 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1315 int16_t dc = block[0];
1323 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1324 block[0] = pred[0] = dc;
1329 block[0] = pred[0] = dc;
1335 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1337 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1338 int i, uint8_t *token_prob,
1340 const uint8_t scan[16])
1342 return decode_block_coeffs_internal(r, block, probs, i,
1343 token_prob, qmul, scan, IS_VP7);
1346 #ifndef vp8_decode_block_coeffs_internal
1347 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1349 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1350 int i, uint8_t *token_prob,
1353 return decode_block_coeffs_internal(r, block, probs, i,
1354 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1359 * @param c arithmetic bitstream reader context
1360 * @param block destination for block coefficients
1361 * @param probs probabilities to use when reading trees from the bitstream
1362 * @param i initial coeff index, 0 unless a separate DC block is coded
1363 * @param zero_nhood the initial prediction context for number of surrounding
1364 * all-zero blocks (only left/top, so 0-2)
1365 * @param qmul array holding the dc/ac dequant factor at position 0/1
1366 * @param scan scan pattern (VP7 only)
1368 * @return 0 if no coeffs were decoded
1369 * otherwise, the index of the last coeff decoded plus one
1371 static av_always_inline
1372 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1373 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1374 int i, int zero_nhood, int16_t qmul[2],
1375 const uint8_t scan[16], int vp7)
1377 uint8_t *token_prob = probs[i][zero_nhood];
1378 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1380 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1381 token_prob, qmul, scan)
1382 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1386 static av_always_inline
1387 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1388 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1391 int i, x, y, luma_start = 0, luma_ctx = 3;
1392 int nnz_pred, nnz, nnz_total = 0;
1393 int segment = mb->segment;
1396 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1397 nnz_pred = t_nnz[8] + l_nnz[8];
1399 // decode DC values and do hadamard
1400 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1401 nnz_pred, s->qmat[segment].luma_dc_qmul,
1402 ff_zigzag_scan, is_vp7);
1403 l_nnz[8] = t_nnz[8] = !!nnz;
1405 if (is_vp7 && mb->mode > MODE_I4x4) {
1406 nnz |= inter_predict_dc(td->block_dc,
1407 s->inter_dc_pred[mb->ref_frame - 1]);
1414 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1416 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1423 for (y = 0; y < 4; y++)
1424 for (x = 0; x < 4; x++) {
1425 nnz_pred = l_nnz[y] + t_nnz[x];
1426 nnz = decode_block_coeffs(c, td->block[y][x],
1427 s->prob->token[luma_ctx],
1428 luma_start, nnz_pred,
1429 s->qmat[segment].luma_qmul,
1430 s->prob[0].scan, is_vp7);
1431 /* nnz+block_dc may be one more than the actual last index,
1432 * but we don't care */
1433 td->non_zero_count_cache[y][x] = nnz + block_dc;
1434 t_nnz[x] = l_nnz[y] = !!nnz;
1439 // TODO: what to do about dimensions? 2nd dim for luma is x,
1440 // but for chroma it's (y<<1)|x
1441 for (i = 4; i < 6; i++)
1442 for (y = 0; y < 2; y++)
1443 for (x = 0; x < 2; x++) {
1444 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1445 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1446 s->prob->token[2], 0, nnz_pred,
1447 s->qmat[segment].chroma_qmul,
1448 s->prob[0].scan, is_vp7);
1449 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1450 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1454 // if there were no coded coeffs despite the macroblock not being marked skip,
1455 // we MUST not do the inner loop filter and should not do IDCT
1456 // Since skip isn't used for bitstream prediction, just manually set it.
1461 static av_always_inline
1462 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1463 uint8_t *src_cb, uint8_t *src_cr,
1464 int linesize, int uvlinesize, int simple)
1466 AV_COPY128(top_border, src_y + 15 * linesize);
1468 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1469 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1473 static av_always_inline
1474 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1475 uint8_t *src_cr, int linesize, int uvlinesize, int mb_x,
1476 int mb_y, int mb_width, int simple, int xchg)
1478 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1480 src_cb -= uvlinesize;
1481 src_cr -= uvlinesize;
1483 #define XCHG(a, b, xchg) \
1491 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1492 XCHG(top_border, src_y, xchg);
1493 XCHG(top_border + 8, src_y + 8, 1);
1494 if (mb_x < mb_width - 1)
1495 XCHG(top_border + 32, src_y + 16, 1);
1497 // only copy chroma for normal loop filter
1498 // or to initialize the top row to 127
1499 if (!simple || !mb_y) {
1500 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1501 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1502 XCHG(top_border + 16, src_cb, 1);
1503 XCHG(top_border + 24, src_cr, 1);
1507 static av_always_inline
1508 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1511 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1513 return mb_y ? mode : LEFT_DC_PRED8x8;
1516 static av_always_inline
1517 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1520 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1522 return mb_y ? mode : HOR_PRED8x8;
1525 static av_always_inline
1526 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1530 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1532 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1534 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1535 case PLANE_PRED8x8: /* TM */
1536 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1541 static av_always_inline
1542 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1545 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1547 return mb_y ? mode : HOR_VP8_PRED;
1551 static av_always_inline
1552 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1553 int *copy_buf, int vp7)
1557 if (!mb_x && mb_y) {
1562 case DIAG_DOWN_LEFT_PRED:
1563 case VERT_LEFT_PRED:
1564 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1572 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1574 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1575 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1576 * as 16x16/8x8 DC */
1577 case DIAG_DOWN_RIGHT_PRED:
1578 case VERT_RIGHT_PRED:
1587 static av_always_inline
1588 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1589 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1591 int x, y, mode, nnz;
1594 /* for the first row, we need to run xchg_mb_border to init the top edge
1595 * to 127 otherwise, skip it if we aren't going to deblock */
1596 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1597 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1598 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1599 s->filter.simple, 1);
1601 if (mb->mode < MODE_I4x4) {
1602 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1603 s->hpc.pred16x16[mode](dst[0], s->linesize);
1605 uint8_t *ptr = dst[0];
1606 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1607 const uint8_t lo = is_vp7 ? 128 : 127;
1608 const uint8_t hi = is_vp7 ? 128 : 129;
1609 uint8_t tr_top[4] = { lo, lo, lo, lo };
1611 // all blocks on the right edge of the macroblock use bottom edge
1612 // the top macroblock for their topright edge
1613 uint8_t *tr_right = ptr - s->linesize + 16;
1615 // if we're on the right edge of the frame, said edge is extended
1616 // from the top macroblock
1617 if (mb_y && mb_x == s->mb_width - 1) {
1618 tr = tr_right[-1] * 0x01010101u;
1619 tr_right = (uint8_t *) &tr;
1623 AV_ZERO128(td->non_zero_count_cache);
1625 for (y = 0; y < 4; y++) {
1626 uint8_t *topright = ptr + 4 - s->linesize;
1627 for (x = 0; x < 4; x++) {
1628 int copy = 0, linesize = s->linesize;
1629 uint8_t *dst = ptr + 4 * x;
1630 LOCAL_ALIGNED(4, uint8_t, copy_dst, [5 * 8]);
1632 if ((y == 0 || x == 3) && mb_y == 0) {
1635 topright = tr_right;
1637 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1638 mb_y + y, ©, is_vp7);
1640 dst = copy_dst + 12;
1644 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1646 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1650 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1659 copy_dst[11] = ptr[4 * x - 1];
1660 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1661 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1662 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1665 s->hpc.pred4x4[mode](dst, topright, linesize);
1667 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1668 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1669 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1670 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1673 nnz = td->non_zero_count_cache[y][x];
1676 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1677 td->block[y][x], s->linesize);
1679 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1680 td->block[y][x], s->linesize);
1685 ptr += 4 * s->linesize;
1690 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1691 mb_x, mb_y, is_vp7);
1692 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1693 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1695 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1696 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1697 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1698 s->filter.simple, 0);
1701 static const uint8_t subpel_idx[3][8] = {
1702 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1703 // also function pointer index
1704 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1705 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1711 * @param s VP8 decoding context
1712 * @param dst target buffer for block data at block position
1713 * @param ref reference picture buffer at origin (0, 0)
1714 * @param mv motion vector (relative to block position) to get pixel data from
1715 * @param x_off horizontal position of block from origin (0, 0)
1716 * @param y_off vertical position of block from origin (0, 0)
1717 * @param block_w width of block (16, 8 or 4)
1718 * @param block_h height of block (always same as block_w)
1719 * @param width width of src/dst plane data
1720 * @param height height of src/dst plane data
1721 * @param linesize size of a single line of plane data, including padding
1722 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1724 static av_always_inline
1725 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1726 ThreadFrame *ref, const VP56mv *mv,
1727 int x_off, int y_off, int block_w, int block_h,
1728 int width, int height, ptrdiff_t linesize,
1729 vp8_mc_func mc_func[3][3])
1731 uint8_t *src = ref->f->data[0];
1734 int src_linesize = linesize;
1736 int mx = (mv->x * 2) & 7, mx_idx = subpel_idx[0][mx];
1737 int my = (mv->y * 2) & 7, my_idx = subpel_idx[0][my];
1739 x_off += mv->x >> 2;
1740 y_off += mv->y >> 2;
1743 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1744 src += y_off * linesize + x_off;
1745 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1746 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1747 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1748 src - my_idx * linesize - mx_idx,
1749 EDGE_EMU_LINESIZE, linesize,
1750 block_w + subpel_idx[1][mx],
1751 block_h + subpel_idx[1][my],
1752 x_off - mx_idx, y_off - my_idx,
1754 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1755 src_linesize = EDGE_EMU_LINESIZE;
1757 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1759 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1760 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1761 linesize, block_h, 0, 0);
1766 * chroma MC function
1768 * @param s VP8 decoding context
1769 * @param dst1 target buffer for block data at block position (U plane)
1770 * @param dst2 target buffer for block data at block position (V plane)
1771 * @param ref reference picture buffer at origin (0, 0)
1772 * @param mv motion vector (relative to block position) to get pixel data from
1773 * @param x_off horizontal position of block from origin (0, 0)
1774 * @param y_off vertical position of block from origin (0, 0)
1775 * @param block_w width of block (16, 8 or 4)
1776 * @param block_h height of block (always same as block_w)
1777 * @param width width of src/dst plane data
1778 * @param height height of src/dst plane data
1779 * @param linesize size of a single line of plane data, including padding
1780 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1782 static av_always_inline
1783 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1784 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1785 int x_off, int y_off, int block_w, int block_h,
1786 int width, int height, ptrdiff_t linesize,
1787 vp8_mc_func mc_func[3][3])
1789 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1792 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1793 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1795 x_off += mv->x >> 3;
1796 y_off += mv->y >> 3;
1799 src1 += y_off * linesize + x_off;
1800 src2 += y_off * linesize + x_off;
1801 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1802 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1803 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1804 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1805 src1 - my_idx * linesize - mx_idx,
1806 EDGE_EMU_LINESIZE, linesize,
1807 block_w + subpel_idx[1][mx],
1808 block_h + subpel_idx[1][my],
1809 x_off - mx_idx, y_off - my_idx, width, height);
1810 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1811 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1813 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1814 src2 - my_idx * linesize - mx_idx,
1815 EDGE_EMU_LINESIZE, linesize,
1816 block_w + subpel_idx[1][mx],
1817 block_h + subpel_idx[1][my],
1818 x_off - mx_idx, y_off - my_idx, width, height);
1819 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1820 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1822 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1823 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1826 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1827 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1828 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1832 static av_always_inline
1833 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1834 ThreadFrame *ref_frame, int x_off, int y_off,
1835 int bx_off, int by_off, int block_w, int block_h,
1836 int width, int height, VP56mv *mv)
1841 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1842 ref_frame, mv, x_off + bx_off, y_off + by_off,
1843 block_w, block_h, width, height, s->linesize,
1844 s->put_pixels_tab[block_w == 8]);
1847 if (s->profile == 3) {
1848 /* this block only applies VP8; it is safe to check
1849 * only the profile, as VP7 profile <= 1 */
1861 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1862 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1863 &uvmv, x_off + bx_off, y_off + by_off,
1864 block_w, block_h, width, height, s->uvlinesize,
1865 s->put_pixels_tab[1 + (block_w == 4)]);
1868 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1869 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1870 static av_always_inline
1871 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1874 /* Don't prefetch refs that haven't been used very often this frame. */
1875 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1876 int x_off = mb_x << 4, y_off = mb_y << 4;
1877 int mx = (mb->mv.x >> 2) + x_off + 8;
1878 int my = (mb->mv.y >> 2) + y_off;
1879 uint8_t **src = s->framep[ref]->tf.f->data;
1880 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1881 /* For threading, a ff_thread_await_progress here might be useful, but
1882 * it actually slows down the decoder. Since a bad prefetch doesn't
1883 * generate bad decoder output, we don't run it here. */
1884 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1885 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1886 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1891 * Apply motion vectors to prediction buffer, chapter 18.
1893 static av_always_inline
1894 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1895 VP8Macroblock *mb, int mb_x, int mb_y)
1897 int x_off = mb_x << 4, y_off = mb_y << 4;
1898 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1899 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1900 VP56mv *bmv = mb->bmv;
1902 switch (mb->partitioning) {
1903 case VP8_SPLITMVMODE_NONE:
1904 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1905 0, 0, 16, 16, width, height, &mb->mv);
1907 case VP8_SPLITMVMODE_4x4: {
1912 for (y = 0; y < 4; y++) {
1913 for (x = 0; x < 4; x++) {
1914 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1915 ref, &bmv[4 * y + x],
1916 4 * x + x_off, 4 * y + y_off, 4, 4,
1917 width, height, s->linesize,
1918 s->put_pixels_tab[2]);
1927 for (y = 0; y < 2; y++) {
1928 for (x = 0; x < 2; x++) {
1929 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1930 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1931 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1932 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1933 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1934 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1935 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1936 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1937 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1938 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1939 if (s->profile == 3) {
1943 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1944 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1945 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1946 width, height, s->uvlinesize,
1947 s->put_pixels_tab[2]);
1952 case VP8_SPLITMVMODE_16x8:
1953 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1954 0, 0, 16, 8, width, height, &bmv[0]);
1955 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1956 0, 8, 16, 8, width, height, &bmv[1]);
1958 case VP8_SPLITMVMODE_8x16:
1959 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1960 0, 0, 8, 16, width, height, &bmv[0]);
1961 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1962 8, 0, 8, 16, width, height, &bmv[1]);
1964 case VP8_SPLITMVMODE_8x8:
1965 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1966 0, 0, 8, 8, width, height, &bmv[0]);
1967 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1968 8, 0, 8, 8, width, height, &bmv[1]);
1969 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1970 0, 8, 8, 8, width, height, &bmv[2]);
1971 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1972 8, 8, 8, 8, width, height, &bmv[3]);
1977 static av_always_inline
1978 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1982 if (mb->mode != MODE_I4x4) {
1983 uint8_t *y_dst = dst[0];
1984 for (y = 0; y < 4; y++) {
1985 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1987 if (nnz4 & ~0x01010101) {
1988 for (x = 0; x < 4; x++) {
1989 if ((uint8_t) nnz4 == 1)
1990 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1993 else if ((uint8_t) nnz4 > 1)
1994 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2002 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2005 y_dst += 4 * s->linesize;
2009 for (ch = 0; ch < 2; ch++) {
2010 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2012 uint8_t *ch_dst = dst[1 + ch];
2013 if (nnz4 & ~0x01010101) {
2014 for (y = 0; y < 2; y++) {
2015 for (x = 0; x < 2; x++) {
2016 if ((uint8_t) nnz4 == 1)
2017 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2018 td->block[4 + ch][(y << 1) + x],
2020 else if ((uint8_t) nnz4 > 1)
2021 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2022 td->block[4 + ch][(y << 1) + x],
2026 goto chroma_idct_end;
2028 ch_dst += 4 * s->uvlinesize;
2031 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2039 static av_always_inline
2040 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2041 VP8FilterStrength *f, int is_vp7)
2043 int interior_limit, filter_level;
2045 if (s->segmentation.enabled) {
2046 filter_level = s->segmentation.filter_level[mb->segment];
2047 if (!s->segmentation.absolute_vals)
2048 filter_level += s->filter.level;
2050 filter_level = s->filter.level;
2052 if (s->lf_delta.enabled) {
2053 filter_level += s->lf_delta.ref[mb->ref_frame];
2054 filter_level += s->lf_delta.mode[mb->mode];
2057 filter_level = av_clip_uintp2(filter_level, 6);
2059 interior_limit = filter_level;
2060 if (s->filter.sharpness) {
2061 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2062 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2064 interior_limit = FFMAX(interior_limit, 1);
2066 f->filter_level = filter_level;
2067 f->inner_limit = interior_limit;
2068 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2069 mb->mode == VP8_MVMODE_SPLIT;
2072 static av_always_inline
2073 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2074 int mb_x, int mb_y, int is_vp7)
2076 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2077 int filter_level = f->filter_level;
2078 int inner_limit = f->inner_limit;
2079 int inner_filter = f->inner_filter;
2080 int linesize = s->linesize;
2081 int uvlinesize = s->uvlinesize;
2082 static const uint8_t hev_thresh_lut[2][64] = {
2083 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2084 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2085 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2087 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2088 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2097 bedge_lim_y = filter_level;
2098 bedge_lim_uv = filter_level * 2;
2099 mbedge_lim = filter_level + 2;
2102 bedge_lim_uv = filter_level * 2 + inner_limit;
2103 mbedge_lim = bedge_lim_y + 4;
2106 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2109 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2110 mbedge_lim, inner_limit, hev_thresh);
2111 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2112 mbedge_lim, inner_limit, hev_thresh);
2115 #define H_LOOP_FILTER_16Y_INNER(cond) \
2116 if (cond && inner_filter) { \
2117 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2118 bedge_lim_y, inner_limit, \
2120 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2121 bedge_lim_y, inner_limit, \
2123 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2124 bedge_lim_y, inner_limit, \
2126 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2127 uvlinesize, bedge_lim_uv, \
2128 inner_limit, hev_thresh); \
2131 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2134 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2135 mbedge_lim, inner_limit, hev_thresh);
2136 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2137 mbedge_lim, inner_limit, hev_thresh);
2141 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2142 linesize, bedge_lim_y,
2143 inner_limit, hev_thresh);
2144 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2145 linesize, bedge_lim_y,
2146 inner_limit, hev_thresh);
2147 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2148 linesize, bedge_lim_y,
2149 inner_limit, hev_thresh);
2150 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2151 dst[2] + 4 * uvlinesize,
2152 uvlinesize, bedge_lim_uv,
2153 inner_limit, hev_thresh);
2156 H_LOOP_FILTER_16Y_INNER(is_vp7)
2159 static av_always_inline
2160 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2163 int mbedge_lim, bedge_lim;
2164 int filter_level = f->filter_level;
2165 int inner_limit = f->inner_limit;
2166 int inner_filter = f->inner_filter;
2167 int linesize = s->linesize;
2172 bedge_lim = 2 * filter_level + inner_limit;
2173 mbedge_lim = bedge_lim + 4;
2176 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2178 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2179 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2180 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2184 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2186 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2187 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2188 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2192 #define MARGIN (16 << 2)
2193 static av_always_inline
2194 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2195 VP8Frame *prev_frame, int is_vp7)
2197 VP8Context *s = avctx->priv_data;
2200 s->mv_min.y = -MARGIN;
2201 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2202 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2203 VP8Macroblock *mb = s->macroblocks_base +
2204 ((s->mb_width + 1) * (mb_y + 1) + 1);
2205 int mb_xy = mb_y * s->mb_width;
2207 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2209 s->mv_min.x = -MARGIN;
2210 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2211 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2213 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2214 DC_PRED * 0x01010101);
2215 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2216 prev_frame && prev_frame->seg_map ?
2217 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2226 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2227 VP8Frame *prev_frame)
2229 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2232 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2233 VP8Frame *prev_frame)
2235 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2239 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2241 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2242 if (otd->thread_mb_pos < tmp) { \
2243 pthread_mutex_lock(&otd->lock); \
2244 td->wait_mb_pos = tmp; \
2246 if (otd->thread_mb_pos >= tmp) \
2248 pthread_cond_wait(&otd->cond, &otd->lock); \
2250 td->wait_mb_pos = INT_MAX; \
2251 pthread_mutex_unlock(&otd->lock); \
2255 #define update_pos(td, mb_y, mb_x) \
2257 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2258 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2260 int is_null = !next_td || !prev_td; \
2261 int pos_check = (is_null) ? 1 \
2262 : (next_td != td && \
2263 pos >= next_td->wait_mb_pos) || \
2265 pos >= prev_td->wait_mb_pos); \
2266 td->thread_mb_pos = pos; \
2267 if (sliced_threading && pos_check) { \
2268 pthread_mutex_lock(&td->lock); \
2269 pthread_cond_broadcast(&td->cond); \
2270 pthread_mutex_unlock(&td->lock); \
2274 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) while(0)
2275 #define update_pos(td, mb_y, mb_x) while(0)
2278 static av_always_inline void decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2279 int jobnr, int threadnr, int is_vp7)
2281 VP8Context *s = avctx->priv_data;
2282 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2283 int mb_y = td->thread_mb_pos >> 16;
2284 int mb_x, mb_xy = mb_y * s->mb_width;
2285 int num_jobs = s->num_jobs;
2286 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2287 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2290 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2291 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2292 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2297 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2298 if (mb_y == s->mb_height - 1)
2301 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2302 if (s->mb_layout == 1)
2303 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2305 // Make sure the previous frame has read its segmentation map,
2306 // if we re-use the same map.
2307 if (prev_frame && s->segmentation.enabled &&
2308 !s->segmentation.update_map)
2309 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2310 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2311 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2312 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2315 if (!is_vp7 || mb_y == 0)
2316 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2318 s->mv_min.x = -MARGIN;
2319 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2321 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2322 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2323 if (prev_td != td) {
2324 if (threadnr != 0) {
2325 check_thread_pos(td, prev_td,
2326 mb_x + (is_vp7 ? 2 : 1),
2327 mb_y - (is_vp7 ? 2 : 1));
2329 check_thread_pos(td, prev_td,
2330 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2331 mb_y - (is_vp7 ? 2 : 1));
2335 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2337 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2338 dst[2] - dst[1], 2);
2341 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2342 prev_frame && prev_frame->seg_map ?
2343 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2345 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2348 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2350 if (mb->mode <= MODE_I4x4)
2351 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2353 inter_predict(s, td, dst, mb, mb_x, mb_y);
2355 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2358 idct_mb(s, td, dst, mb);
2360 AV_ZERO64(td->left_nnz);
2361 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2363 /* Reset DC block predictors if they would exist
2364 * if the mb had coefficients */
2365 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2366 td->left_nnz[8] = 0;
2367 s->top_nnz[mb_x][8] = 0;
2371 if (s->deblock_filter)
2372 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2374 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2375 if (s->filter.simple)
2376 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2377 NULL, NULL, s->linesize, 0, 1);
2379 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2380 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2383 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2391 if (mb_x == s->mb_width + 1) {
2392 update_pos(td, mb_y, s->mb_width + 3);
2394 update_pos(td, mb_y, mb_x);
2399 static void vp7_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2400 int jobnr, int threadnr)
2402 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 1);
2405 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2406 int jobnr, int threadnr)
2408 decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, 0);
2411 static av_always_inline void filter_mb_row(AVCodecContext *avctx, void *tdata,
2412 int jobnr, int threadnr, int is_vp7)
2414 VP8Context *s = avctx->priv_data;
2415 VP8ThreadData *td = &s->thread_data[threadnr];
2416 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2417 AVFrame *curframe = s->curframe->tf.f;
2419 VP8ThreadData *prev_td, *next_td;
2421 curframe->data[0] + 16 * mb_y * s->linesize,
2422 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2423 curframe->data[2] + 8 * mb_y * s->uvlinesize
2426 if (s->mb_layout == 1)
2427 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2429 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2434 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2435 if (mb_y == s->mb_height - 1)
2438 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2440 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2441 VP8FilterStrength *f = &td->filter_strength[mb_x];
2443 check_thread_pos(td, prev_td,
2444 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2446 if (next_td != &s->thread_data[0])
2447 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2449 if (num_jobs == 1) {
2450 if (s->filter.simple)
2451 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2452 NULL, NULL, s->linesize, 0, 1);
2454 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2455 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2458 if (s->filter.simple)
2459 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2461 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2466 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2470 static void vp7_filter_mb_row(AVCodecContext *avctx, void *tdata,
2471 int jobnr, int threadnr)
2473 filter_mb_row(avctx, tdata, jobnr, threadnr, 1);
2476 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2477 int jobnr, int threadnr)
2479 filter_mb_row(avctx, tdata, jobnr, threadnr, 0);
2482 static av_always_inline
2483 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2484 int threadnr, int is_vp7)
2486 VP8Context *s = avctx->priv_data;
2487 VP8ThreadData *td = &s->thread_data[jobnr];
2488 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2489 VP8Frame *curframe = s->curframe;
2490 int mb_y, num_jobs = s->num_jobs;
2492 td->thread_nr = threadnr;
2493 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2494 if (mb_y >= s->mb_height)
2496 td->thread_mb_pos = mb_y << 16;
2497 s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
2498 if (s->deblock_filter)
2499 s->filter_mb_row(avctx, tdata, jobnr, threadnr);
2500 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2505 if (avctx->active_thread_type == FF_THREAD_FRAME)
2506 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2512 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2513 int jobnr, int threadnr)
2515 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2518 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2519 int jobnr, int threadnr)
2521 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2525 static av_always_inline
2526 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2527 AVPacket *avpkt, int is_vp7)
2529 VP8Context *s = avctx->priv_data;
2530 int ret, i, referenced, num_jobs;
2531 enum AVDiscard skip_thresh;
2532 VP8Frame *av_uninit(curframe), *prev_frame;
2535 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2537 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2542 prev_frame = s->framep[VP56_FRAME_CURRENT];
2544 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2545 s->update_altref == VP56_FRAME_CURRENT;
2547 skip_thresh = !referenced ? AVDISCARD_NONREF
2548 : !s->keyframe ? AVDISCARD_NONKEY
2551 if (avctx->skip_frame >= skip_thresh) {
2553 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2556 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2558 // release no longer referenced frames
2559 for (i = 0; i < 5; i++)
2560 if (s->frames[i].tf.f->data[0] &&
2561 &s->frames[i] != prev_frame &&
2562 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2563 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2564 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2565 vp8_release_frame(s, &s->frames[i]);
2567 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2570 avctx->colorspace = AVCOL_SPC_BT470BG;
2572 avctx->color_range = AVCOL_RANGE_JPEG;
2574 avctx->color_range = AVCOL_RANGE_MPEG;
2576 /* Given that arithmetic probabilities are updated every frame, it's quite
2577 * likely that the values we have on a random interframe are complete
2578 * junk if we didn't start decode on a keyframe. So just don't display
2579 * anything rather than junk. */
2580 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2581 !s->framep[VP56_FRAME_GOLDEN] ||
2582 !s->framep[VP56_FRAME_GOLDEN2])) {
2583 av_log(avctx, AV_LOG_WARNING,
2584 "Discarding interframe without a prior keyframe!\n");
2585 ret = AVERROR_INVALIDDATA;
2589 curframe->tf.f->key_frame = s->keyframe;
2590 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2591 : AV_PICTURE_TYPE_P;
2592 if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
2595 // check if golden and altref are swapped
2596 if (s->update_altref != VP56_FRAME_NONE)
2597 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2599 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2601 if (s->update_golden != VP56_FRAME_NONE)
2602 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2604 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2607 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2609 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2611 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2613 if (avctx->codec->update_thread_context)
2614 ff_thread_finish_setup(avctx);
2616 s->linesize = curframe->tf.f->linesize[0];
2617 s->uvlinesize = curframe->tf.f->linesize[1];
2619 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2620 /* Zero macroblock structures for top/top-left prediction
2621 * from outside the frame. */
2623 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2624 (s->mb_width + 1) * sizeof(*s->macroblocks));
2625 if (!s->mb_layout && s->keyframe)
2626 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2628 memset(s->ref_count, 0, sizeof(s->ref_count));
2630 if (s->mb_layout == 1) {
2631 // Make sure the previous frame has read its segmentation map,
2632 // if we re-use the same map.
2633 if (prev_frame && s->segmentation.enabled &&
2634 !s->segmentation.update_map)
2635 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2637 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2639 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2642 if (avctx->active_thread_type == FF_THREAD_FRAME)
2645 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2646 s->num_jobs = num_jobs;
2647 s->curframe = curframe;
2648 s->prev_frame = prev_frame;
2649 s->mv_min.y = -MARGIN;
2650 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2651 for (i = 0; i < MAX_THREADS; i++) {
2652 s->thread_data[i].thread_mb_pos = 0;
2653 s->thread_data[i].wait_mb_pos = INT_MAX;
2656 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2659 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2662 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2663 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2666 // if future frames don't use the updated probabilities,
2667 // reset them to the values we saved
2668 if (!s->update_probabilities)
2669 s->prob[0] = s->prob[1];
2671 if (!s->invisible) {
2672 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2679 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2683 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2686 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2689 #if CONFIG_VP7_DECODER
2690 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2693 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2695 #endif /* CONFIG_VP7_DECODER */
2697 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2699 VP8Context *s = avctx->priv_data;
2705 vp8_decode_flush_impl(avctx, 1);
2706 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2707 av_frame_free(&s->frames[i].tf.f);
2712 static av_cold int vp8_init_frames(VP8Context *s)
2715 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2716 s->frames[i].tf.f = av_frame_alloc();
2717 if (!s->frames[i].tf.f)
2718 return AVERROR(ENOMEM);
2723 static av_always_inline
2724 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2726 VP8Context *s = avctx->priv_data;
2730 s->vp7 = avctx->codec->id == AV_CODEC_ID_VP7;
2731 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2732 avctx->internal->allocate_progress = 1;
2734 ff_videodsp_init(&s->vdsp, 8);
2736 ff_vp78dsp_init(&s->vp8dsp);
2737 if (CONFIG_VP7_DECODER && is_vp7) {
2738 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2739 ff_vp7dsp_init(&s->vp8dsp);
2740 s->decode_mb_row_no_filter = vp7_decode_mb_row_no_filter;
2741 s->filter_mb_row = vp7_filter_mb_row;
2742 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2743 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2744 ff_vp8dsp_init(&s->vp8dsp);
2745 s->decode_mb_row_no_filter = vp8_decode_mb_row_no_filter;
2746 s->filter_mb_row = vp8_filter_mb_row;
2749 /* does not change for VP8 */
2750 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2752 if ((ret = vp8_init_frames(s)) < 0) {
2753 ff_vp8_decode_free(avctx);
2760 #if CONFIG_VP7_DECODER
2761 static int vp7_decode_init(AVCodecContext *avctx)
2763 return vp78_decode_init(avctx, IS_VP7);
2765 #endif /* CONFIG_VP7_DECODER */
2767 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2769 return vp78_decode_init(avctx, IS_VP8);
2772 #if CONFIG_VP8_DECODER
2774 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2776 VP8Context *s = avctx->priv_data;
2781 if ((ret = vp8_init_frames(s)) < 0) {
2782 ff_vp8_decode_free(avctx);
2789 #define REBASE(pic) ((pic) ? (pic) - &s_src->frames[0] + &s->frames[0] : NULL)
2791 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2792 const AVCodecContext *src)
2794 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2797 if (s->macroblocks_base &&
2798 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2800 s->mb_width = s_src->mb_width;
2801 s->mb_height = s_src->mb_height;
2804 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2805 s->segmentation = s_src->segmentation;
2806 s->lf_delta = s_src->lf_delta;
2807 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2809 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2810 if (s_src->frames[i].tf.f->data[0]) {
2811 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2817 s->framep[0] = REBASE(s_src->next_framep[0]);
2818 s->framep[1] = REBASE(s_src->next_framep[1]);
2819 s->framep[2] = REBASE(s_src->next_framep[2]);
2820 s->framep[3] = REBASE(s_src->next_framep[3]);
2824 #endif /* HAVE_THREADS */
2825 #endif /* CONFIG_VP8_DECODER */
2827 #if CONFIG_VP7_DECODER
2828 AVCodec ff_vp7_decoder = {
2830 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2831 .type = AVMEDIA_TYPE_VIDEO,
2832 .id = AV_CODEC_ID_VP7,
2833 .priv_data_size = sizeof(VP8Context),
2834 .init = vp7_decode_init,
2835 .close = ff_vp8_decode_free,
2836 .decode = vp7_decode_frame,
2837 .capabilities = AV_CODEC_CAP_DR1,
2838 .flush = vp8_decode_flush,
2840 #endif /* CONFIG_VP7_DECODER */
2842 #if CONFIG_VP8_DECODER
2843 AVCodec ff_vp8_decoder = {
2845 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2846 .type = AVMEDIA_TYPE_VIDEO,
2847 .id = AV_CODEC_ID_VP8,
2848 .priv_data_size = sizeof(VP8Context),
2849 .init = ff_vp8_decode_init,
2850 .close = ff_vp8_decode_free,
2851 .decode = ff_vp8_decode_frame,
2852 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2853 AV_CODEC_CAP_SLICE_THREADS,
2854 .flush = vp8_decode_flush,
2855 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2856 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2858 #endif /* CONFIG_VP7_DECODER */