2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
32 #include "rectangle.h"
41 static void free_buffers(VP8Context *s)
45 for (i = 0; i < MAX_THREADS; i++) {
47 pthread_cond_destroy(&s->thread_data[i].cond);
48 pthread_mutex_destroy(&s->thread_data[i].lock);
50 av_freep(&s->thread_data[i].filter_strength);
52 av_freep(&s->thread_data);
53 av_freep(&s->macroblocks_base);
54 av_freep(&s->intra4x4_pred_mode_top);
55 av_freep(&s->top_nnz);
56 av_freep(&s->top_border);
58 s->macroblocks = NULL;
61 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
64 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
65 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
67 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height)))
69 if (s->avctx->hwaccel) {
70 const AVHWAccel *hwaccel = s->avctx->hwaccel;
71 if (hwaccel->frame_priv_data_size) {
72 f->hwaccel_priv_buf = av_buffer_allocz(hwaccel->frame_priv_data_size);
73 if (!f->hwaccel_priv_buf)
75 f->hwaccel_picture_private = f->hwaccel_priv_buf->data;
81 av_buffer_unref(&f->seg_map);
82 ff_thread_release_buffer(s->avctx, &f->tf);
83 return AVERROR(ENOMEM);
86 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
88 av_buffer_unref(&f->seg_map);
89 av_buffer_unref(&f->hwaccel_priv_buf);
90 f->hwaccel_picture_private = NULL;
91 ff_thread_release_buffer(s->avctx, &f->tf);
94 #if CONFIG_VP8_DECODER
95 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
99 vp8_release_frame(s, dst);
101 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
104 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
105 vp8_release_frame(s, dst);
106 return AVERROR(ENOMEM);
108 if (src->hwaccel_picture_private) {
109 dst->hwaccel_priv_buf = av_buffer_ref(src->hwaccel_priv_buf);
110 if (!dst->hwaccel_priv_buf)
111 return AVERROR(ENOMEM);
112 dst->hwaccel_picture_private = dst->hwaccel_priv_buf->data;
117 #endif /* CONFIG_VP8_DECODER */
119 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
121 VP8Context *s = avctx->priv_data;
124 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
125 vp8_release_frame(s, &s->frames[i]);
126 memset(s->framep, 0, sizeof(s->framep));
132 static void vp8_decode_flush(AVCodecContext *avctx)
134 vp8_decode_flush_impl(avctx, 0);
137 static VP8Frame *vp8_find_free_buffer(VP8Context *s)
139 VP8Frame *frame = NULL;
142 // find a free buffer
143 for (i = 0; i < 5; i++)
144 if (&s->frames[i] != s->framep[VP56_FRAME_CURRENT] &&
145 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
146 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
147 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
148 frame = &s->frames[i];
152 av_log(s->avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
155 if (frame->tf.f->buf[0])
156 vp8_release_frame(s, frame);
161 static av_always_inline
162 int update_dimensions(VP8Context *s, int width, int height, int is_vp7)
164 AVCodecContext *avctx = s->avctx;
167 if (width != s->avctx->width ||
168 height != s->avctx->height) {
169 vp8_decode_flush_impl(s->avctx, 1);
171 ret = ff_set_dimensions(s->avctx, width, height);
176 s->mb_width = (s->avctx->coded_width + 15) / 16;
177 s->mb_height = (s->avctx->coded_height + 15) / 16;
179 s->mb_layout = is_vp7 || avctx->active_thread_type == FF_THREAD_SLICE &&
180 FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1;
181 if (!s->mb_layout) { // Frame threading and one thread
182 s->macroblocks_base = av_mallocz((s->mb_width + s->mb_height * 2 + 1) *
183 sizeof(*s->macroblocks));
184 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width * 4);
185 } else // Sliced threading
186 s->macroblocks_base = av_mallocz((s->mb_width + 2) * (s->mb_height + 2) *
187 sizeof(*s->macroblocks));
188 s->top_nnz = av_mallocz(s->mb_width * sizeof(*s->top_nnz));
189 s->top_border = av_mallocz((s->mb_width + 1) * sizeof(*s->top_border));
190 s->thread_data = av_mallocz(MAX_THREADS * sizeof(VP8ThreadData));
192 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
193 !s->thread_data || (!s->intra4x4_pred_mode_top && !s->mb_layout)) {
195 return AVERROR(ENOMEM);
198 for (i = 0; i < MAX_THREADS; i++) {
199 s->thread_data[i].filter_strength =
200 av_mallocz(s->mb_width * sizeof(*s->thread_data[0].filter_strength));
201 if (!s->thread_data[i].filter_strength) {
203 return AVERROR(ENOMEM);
206 pthread_mutex_init(&s->thread_data[i].lock, NULL);
207 pthread_cond_init(&s->thread_data[i].cond, NULL);
211 s->macroblocks = s->macroblocks_base + 1;
216 static int vp7_update_dimensions(VP8Context *s, int width, int height)
218 return update_dimensions(s, width, height, IS_VP7);
221 static int vp8_update_dimensions(VP8Context *s, int width, int height)
223 return update_dimensions(s, width, height, IS_VP8);
226 static void parse_segment_info(VP8Context *s)
228 VP56RangeCoder *c = &s->c;
231 s->segmentation.update_map = vp8_rac_get(c);
232 s->segmentation.update_feature_data = vp8_rac_get(c);
234 if (s->segmentation.update_feature_data) {
235 s->segmentation.absolute_vals = vp8_rac_get(c);
237 for (i = 0; i < 4; i++)
238 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
240 for (i = 0; i < 4; i++)
241 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
243 if (s->segmentation.update_map)
244 for (i = 0; i < 3; i++)
245 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
248 static void update_lf_deltas(VP8Context *s)
250 VP56RangeCoder *c = &s->c;
253 for (i = 0; i < 4; i++) {
254 if (vp8_rac_get(c)) {
255 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
258 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
262 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
263 if (vp8_rac_get(c)) {
264 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
267 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
272 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
274 const uint8_t *sizes = buf;
277 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
279 buf += 3 * (s->num_coeff_partitions - 1);
280 buf_size -= 3 * (s->num_coeff_partitions - 1);
284 for (i = 0; i < s->num_coeff_partitions - 1; i++) {
285 int size = AV_RL24(sizes + 3 * i);
286 if (buf_size - size < 0)
288 s->coeff_partition_size[i] = size;
290 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
295 s->coeff_partition_size[i] = buf_size;
296 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
301 static void vp7_get_quants(VP8Context *s)
303 VP56RangeCoder *c = &s->c;
305 int yac_qi = vp8_rac_get_uint(c, 7);
306 int ydc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
307 int y2dc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
308 int y2ac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
309 int uvdc_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
310 int uvac_qi = vp8_rac_get(c) ? vp8_rac_get_uint(c, 7) : yac_qi;
312 s->qmat[0].luma_qmul[0] = vp7_ydc_qlookup[ydc_qi];
313 s->qmat[0].luma_qmul[1] = vp7_yac_qlookup[yac_qi];
314 s->qmat[0].luma_dc_qmul[0] = vp7_y2dc_qlookup[y2dc_qi];
315 s->qmat[0].luma_dc_qmul[1] = vp7_y2ac_qlookup[y2ac_qi];
316 s->qmat[0].chroma_qmul[0] = FFMIN(vp7_ydc_qlookup[uvdc_qi], 132);
317 s->qmat[0].chroma_qmul[1] = vp7_yac_qlookup[uvac_qi];
320 static void get_quants(VP8Context *s)
322 VP56RangeCoder *c = &s->c;
325 s->quant.yac_qi = vp8_rac_get_uint(c, 7);
326 s->quant.ydc_delta = vp8_rac_get_sint(c, 4);
327 s->quant.y2dc_delta = vp8_rac_get_sint(c, 4);
328 s->quant.y2ac_delta = vp8_rac_get_sint(c, 4);
329 s->quant.uvdc_delta = vp8_rac_get_sint(c, 4);
330 s->quant.uvac_delta = vp8_rac_get_sint(c, 4);
332 for (i = 0; i < 4; i++) {
333 if (s->segmentation.enabled) {
334 base_qi = s->segmentation.base_quant[i];
335 if (!s->segmentation.absolute_vals)
336 base_qi += s->quant.yac_qi;
338 base_qi = s->quant.yac_qi;
340 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.ydc_delta, 7)];
341 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi, 7)];
342 s->qmat[i].luma_dc_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.y2dc_delta, 7)] * 2;
343 /* 101581>>16 is equivalent to 155/100 */
344 s->qmat[i].luma_dc_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.y2ac_delta, 7)] * 101581 >> 16;
345 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + s->quant.uvdc_delta, 7)];
346 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + s->quant.uvac_delta, 7)];
348 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
349 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
354 * Determine which buffers golden and altref should be updated with after this frame.
355 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
357 * Intra frames update all 3 references
358 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
359 * If the update (golden|altref) flag is set, it's updated with the current frame
360 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
361 * If the flag is not set, the number read means:
363 * 1: VP56_FRAME_PREVIOUS
364 * 2: update golden with altref, or update altref with golden
366 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
368 VP56RangeCoder *c = &s->c;
371 return VP56_FRAME_CURRENT;
373 switch (vp8_rac_get_uint(c, 2)) {
375 return VP56_FRAME_PREVIOUS;
377 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
379 return VP56_FRAME_NONE;
382 static void vp78_reset_probability_tables(VP8Context *s)
385 for (i = 0; i < 4; i++)
386 for (j = 0; j < 16; j++)
387 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
388 sizeof(s->prob->token[i][j]));
391 static void vp78_update_probability_tables(VP8Context *s)
393 VP56RangeCoder *c = &s->c;
396 for (i = 0; i < 4; i++)
397 for (j = 0; j < 8; j++)
398 for (k = 0; k < 3; k++)
399 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
400 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
401 int prob = vp8_rac_get_uint(c, 8);
402 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
403 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
407 #define VP7_MVC_SIZE 17
408 #define VP8_MVC_SIZE 19
410 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context *s,
413 VP56RangeCoder *c = &s->c;
417 for (i = 0; i < 4; i++)
418 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
420 for (i = 0; i < 3; i++)
421 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
423 // 17.2 MV probability update
424 for (i = 0; i < 2; i++)
425 for (j = 0; j < mvc_size; j++)
426 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
427 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
430 static void update_refs(VP8Context *s)
432 VP56RangeCoder *c = &s->c;
434 int update_golden = vp8_rac_get(c);
435 int update_altref = vp8_rac_get(c);
437 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
438 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
441 static void copy_luma(AVFrame *dst, AVFrame *src, int width, int height)
445 for (j = 1; j < 3; j++) {
446 for (i = 0; i < height / 2; i++)
447 memcpy(dst->data[j] + i * dst->linesize[j],
448 src->data[j] + i * src->linesize[j], width / 2);
452 static void fade(uint8_t *dst, uint8_t *src,
453 int width, int height, ptrdiff_t linesize,
458 for (j = 0; j < height; j++) {
459 for (i = 0; i < width; i++) {
460 uint8_t y = src[j * linesize + i];
461 dst[j * linesize + i] = av_clip_uint8(y + ((y * beta) >> 8) + alpha);
466 static int vp7_fade_frame(VP8Context *s, VP56RangeCoder *c)
468 int alpha = (int8_t) vp8_rac_get_uint(c, 8);
469 int beta = (int8_t) vp8_rac_get_uint(c, 8);
472 if (!s->keyframe && (alpha || beta)) {
473 int width = s->mb_width * 16;
474 int height = s->mb_height * 16;
477 if (!s->framep[VP56_FRAME_PREVIOUS])
478 return AVERROR_INVALIDDATA;
481 src = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
483 /* preserve the golden frame, write a new previous frame */
484 if (s->framep[VP56_FRAME_GOLDEN] == s->framep[VP56_FRAME_PREVIOUS]) {
485 s->framep[VP56_FRAME_PREVIOUS] = vp8_find_free_buffer(s);
486 if ((ret = vp8_alloc_frame(s, s->framep[VP56_FRAME_PREVIOUS], 1)) < 0)
489 dst = s->framep[VP56_FRAME_PREVIOUS]->tf.f;
491 copy_luma(dst, src, width, height);
494 fade(dst->data[0], src->data[0],
495 width, height, dst->linesize[0], alpha, beta);
501 static int vp7_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
503 VP56RangeCoder *c = &s->c;
504 int part1_size, hscale, vscale, i, j, ret;
505 int width = s->avctx->width;
506 int height = s->avctx->height;
509 return AVERROR_INVALIDDATA;
512 s->profile = (buf[0] >> 1) & 7;
513 if (s->profile > 1) {
514 avpriv_request_sample(s->avctx, "Unknown profile %d", s->profile);
515 return AVERROR_INVALIDDATA;
518 s->keyframe = !(buf[0] & 1);
520 part1_size = AV_RL24(buf) >> 4;
522 buf += 4 - s->profile;
523 buf_size -= 4 - s->profile;
525 if (buf_size < part1_size) {
526 return AVERROR_INVALIDDATA;
529 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
531 ff_vp56_init_range_decoder(c, buf, part1_size);
533 buf_size -= part1_size;
535 /* A. Dimension information (keyframes only) */
537 width = vp8_rac_get_uint(c, 12);
538 height = vp8_rac_get_uint(c, 12);
539 hscale = vp8_rac_get_uint(c, 2);
540 vscale = vp8_rac_get_uint(c, 2);
541 if (hscale || vscale)
542 avpriv_request_sample(s->avctx, "Upscaling");
544 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
545 vp78_reset_probability_tables(s);
546 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
547 sizeof(s->prob->pred16x16));
548 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
549 sizeof(s->prob->pred8x8c));
550 for (i = 0; i < 2; i++)
551 memcpy(s->prob->mvc[i], vp7_mv_default_prob[i],
552 sizeof(vp7_mv_default_prob[i]));
553 memset(&s->segmentation, 0, sizeof(s->segmentation));
554 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
555 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
558 if (s->keyframe || s->profile > 0)
559 memset(s->inter_dc_pred, 0 , sizeof(s->inter_dc_pred));
561 /* B. Decoding information for all four macroblock-level features */
562 for (i = 0; i < 4; i++) {
563 s->feature_enabled[i] = vp8_rac_get(c);
564 if (s->feature_enabled[i]) {
565 s->feature_present_prob[i] = vp8_rac_get_uint(c, 8);
567 for (j = 0; j < 3; j++)
568 s->feature_index_prob[i][j] =
569 vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
571 if (vp7_feature_value_size[s->profile][i])
572 for (j = 0; j < 4; j++)
573 s->feature_value[i][j] =
574 vp8_rac_get(c) ? vp8_rac_get_uint(c, vp7_feature_value_size[s->profile][i]) : 0;
578 s->segmentation.enabled = 0;
579 s->segmentation.update_map = 0;
580 s->lf_delta.enabled = 0;
582 s->num_coeff_partitions = 1;
583 ff_vp56_init_range_decoder(&s->coeff_partition[0], buf, buf_size);
585 if (!s->macroblocks_base || /* first frame */
586 width != s->avctx->width || height != s->avctx->height ||
587 (width + 15) / 16 != s->mb_width || (height + 15) / 16 != s->mb_height) {
588 if ((ret = vp7_update_dimensions(s, width, height)) < 0)
592 /* C. Dequantization indices */
595 /* D. Golden frame update flag (a Flag) for interframes only */
597 s->update_golden = vp8_rac_get(c) ? VP56_FRAME_CURRENT : VP56_FRAME_NONE;
598 s->sign_bias[VP56_FRAME_GOLDEN] = 0;
602 s->update_probabilities = 1;
605 if (s->profile > 0) {
606 s->update_probabilities = vp8_rac_get(c);
607 if (!s->update_probabilities)
608 s->prob[1] = s->prob[0];
611 s->fade_present = vp8_rac_get(c);
614 /* E. Fading information for previous frame */
615 if (s->fade_present && vp8_rac_get(c)) {
616 if ((ret = vp7_fade_frame(s ,c)) < 0)
620 /* F. Loop filter type */
622 s->filter.simple = vp8_rac_get(c);
624 /* G. DCT coefficient ordering specification */
626 for (i = 1; i < 16; i++)
627 s->prob[0].scan[i] = ff_zigzag_scan[vp8_rac_get_uint(c, 4)];
629 /* H. Loop filter levels */
631 s->filter.simple = vp8_rac_get(c);
632 s->filter.level = vp8_rac_get_uint(c, 6);
633 s->filter.sharpness = vp8_rac_get_uint(c, 3);
635 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
636 vp78_update_probability_tables(s);
638 s->mbskip_enabled = 0;
640 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
642 s->prob->intra = vp8_rac_get_uint(c, 8);
643 s->prob->last = vp8_rac_get_uint(c, 8);
644 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP7_MVC_SIZE);
650 static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
652 VP56RangeCoder *c = &s->c;
653 int header_size, hscale, vscale, ret;
654 int width = s->avctx->width;
655 int height = s->avctx->height;
657 s->keyframe = !(buf[0] & 1);
658 s->profile = (buf[0]>>1) & 7;
659 s->invisible = !(buf[0] & 0x10);
660 header_size = AV_RL24(buf) >> 5;
664 s->header_partition_size = header_size;
667 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
670 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab,
671 sizeof(s->put_pixels_tab));
672 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
673 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab,
674 sizeof(s->put_pixels_tab));
676 if (header_size > buf_size - 7 * s->keyframe) {
677 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
678 return AVERROR_INVALIDDATA;
682 if (AV_RL24(buf) != 0x2a019d) {
683 av_log(s->avctx, AV_LOG_ERROR,
684 "Invalid start code 0x%x\n", AV_RL24(buf));
685 return AVERROR_INVALIDDATA;
687 width = AV_RL16(buf + 3) & 0x3fff;
688 height = AV_RL16(buf + 5) & 0x3fff;
689 hscale = buf[4] >> 6;
690 vscale = buf[6] >> 6;
694 if (hscale || vscale)
695 avpriv_request_sample(s->avctx, "Upscaling");
697 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
698 vp78_reset_probability_tables(s);
699 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter,
700 sizeof(s->prob->pred16x16));
701 memcpy(s->prob->pred8x8c, vp8_pred8x8c_prob_inter,
702 sizeof(s->prob->pred8x8c));
703 memcpy(s->prob->mvc, vp8_mv_default_prob,
704 sizeof(s->prob->mvc));
705 memset(&s->segmentation, 0, sizeof(s->segmentation));
706 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
709 ff_vp56_init_range_decoder(c, buf, header_size);
711 buf_size -= header_size;
714 s->colorspace = vp8_rac_get(c);
716 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
717 s->fullrange = vp8_rac_get(c);
720 if ((s->segmentation.enabled = vp8_rac_get(c)))
721 parse_segment_info(s);
723 s->segmentation.update_map = 0; // FIXME: move this to some init function?
725 s->filter.simple = vp8_rac_get(c);
726 s->filter.level = vp8_rac_get_uint(c, 6);
727 s->filter.sharpness = vp8_rac_get_uint(c, 3);
729 if ((s->lf_delta.enabled = vp8_rac_get(c))) {
730 s->lf_delta.update = vp8_rac_get(c);
731 if (s->lf_delta.update)
735 if (setup_partitions(s, buf, buf_size)) {
736 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
737 return AVERROR_INVALIDDATA;
740 if (!s->macroblocks_base || /* first frame */
741 width != s->avctx->width || height != s->avctx->height)
742 if ((ret = vp8_update_dimensions(s, width, height)) < 0)
749 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
750 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
753 // if we aren't saving this frame's probabilities for future frames,
754 // make a copy of the current probabilities
755 if (!(s->update_probabilities = vp8_rac_get(c)))
756 s->prob[1] = s->prob[0];
758 s->update_last = s->keyframe || vp8_rac_get(c);
760 vp78_update_probability_tables(s);
762 if ((s->mbskip_enabled = vp8_rac_get(c)))
763 s->prob->mbskip = vp8_rac_get_uint(c, 8);
766 s->prob->intra = vp8_rac_get_uint(c, 8);
767 s->prob->last = vp8_rac_get_uint(c, 8);
768 s->prob->golden = vp8_rac_get_uint(c, 8);
769 vp78_update_pred16x16_pred8x8_mvc_probabilities(s, VP8_MVC_SIZE);
772 // Record the entropy coder state here so that hwaccels can use it.
773 s->c.code_word = vp56_rac_renorm(&s->c);
774 s->coder_state_at_header_end.input = s->c.buffer - (-s->c.bits / 8);
775 s->coder_state_at_header_end.range = s->c.high;
776 s->coder_state_at_header_end.value = s->c.code_word >> 16;
777 s->coder_state_at_header_end.bit_count = -s->c.bits % 8;
782 static av_always_inline
783 void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
785 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
786 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
790 * Motion vector coding, 17.1.
792 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p, int vp7)
796 if (vp56_rac_get_prob_branchy(c, p[0])) {
799 for (i = 0; i < 3; i++)
800 x += vp56_rac_get_prob(c, p[9 + i]) << i;
801 for (i = (vp7 ? 7 : 9); i > 3; i--)
802 x += vp56_rac_get_prob(c, p[9 + i]) << i;
803 if (!(x & (vp7 ? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c, p[12]))
807 const uint8_t *ps = p + 2;
808 bit = vp56_rac_get_prob(c, *ps);
811 bit = vp56_rac_get_prob(c, *ps);
814 x += vp56_rac_get_prob(c, *ps);
817 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
820 static av_always_inline
821 const uint8_t *get_submv_prob(uint32_t left, uint32_t top, int is_vp7)
824 return vp7_submv_prob;
827 return vp8_submv_prob[4 - !!left];
829 return vp8_submv_prob[2];
830 return vp8_submv_prob[1 - !!left];
834 * Split motion vector prediction, 16.4.
835 * @returns the number of motion vectors parsed (2, 4 or 16)
837 static av_always_inline
838 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
839 int layout, int is_vp7)
843 VP8Macroblock *top_mb;
844 VP8Macroblock *left_mb = &mb[-1];
845 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning];
846 const uint8_t *mbsplits_top, *mbsplits_cur, *firstidx;
848 VP56mv *left_mv = left_mb->bmv;
849 VP56mv *cur_mv = mb->bmv;
851 if (!layout) // layout is inlined, s->mb_layout is not
854 top_mb = &mb[-s->mb_width - 1];
855 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
856 top_mv = top_mb->bmv;
858 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
859 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1]))
860 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
862 part_idx = VP8_SPLITMVMODE_8x8;
864 part_idx = VP8_SPLITMVMODE_4x4;
867 num = vp8_mbsplit_count[part_idx];
868 mbsplits_cur = vp8_mbsplits[part_idx],
869 firstidx = vp8_mbfirstidx[part_idx];
870 mb->partitioning = part_idx;
872 for (n = 0; n < num; n++) {
874 uint32_t left, above;
875 const uint8_t *submv_prob;
878 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
880 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
882 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
884 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
886 submv_prob = get_submv_prob(left, above, is_vp7);
888 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
889 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
890 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
891 mb->bmv[n].y = mb->mv.y +
892 read_mv_component(c, s->prob->mvc[0], is_vp7);
893 mb->bmv[n].x = mb->mv.x +
894 read_mv_component(c, s->prob->mvc[1], is_vp7);
896 AV_ZERO32(&mb->bmv[n]);
899 AV_WN32A(&mb->bmv[n], above);
902 AV_WN32A(&mb->bmv[n], left);
910 * The vp7 reference decoder uses a padding macroblock column (added to right
911 * edge of the frame) to guard against illegal macroblock offsets. The
912 * algorithm has bugs that permit offsets to straddle the padding column.
913 * This function replicates those bugs.
915 * @param[out] edge_x macroblock x address
916 * @param[out] edge_y macroblock y address
918 * @return macroblock offset legal (boolean)
920 static int vp7_calculate_mb_offset(int mb_x, int mb_y, int mb_width,
921 int xoffset, int yoffset, int boundary,
922 int *edge_x, int *edge_y)
924 int vwidth = mb_width + 1;
925 int new = (mb_y + yoffset) * vwidth + mb_x + xoffset;
926 if (new < boundary || new % vwidth == vwidth - 1)
928 *edge_y = new / vwidth;
929 *edge_x = new % vwidth;
933 static const VP56mv *get_bmv_ptr(const VP8Macroblock *mb, int subblock)
935 return &mb->bmv[mb->mode == VP8_MVMODE_SPLIT ? vp8_mbsplits[mb->partitioning][subblock] : 0];
938 static av_always_inline
939 void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb,
940 int mb_x, int mb_y, int layout)
942 VP8Macroblock *mb_edge[12];
943 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR };
944 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
947 uint8_t cnt[3] = { 0 };
948 VP56RangeCoder *c = &s->c;
951 AV_ZERO32(&near_mv[0]);
952 AV_ZERO32(&near_mv[1]);
953 AV_ZERO32(&near_mv[2]);
955 for (i = 0; i < VP7_MV_PRED_COUNT; i++) {
956 const VP7MVPred * pred = &vp7_mv_pred[i];
959 if (vp7_calculate_mb_offset(mb_x, mb_y, s->mb_width, pred->xoffset,
960 pred->yoffset, !s->profile, &edge_x, &edge_y)) {
961 VP8Macroblock *edge = mb_edge[i] = (s->mb_layout == 1)
962 ? s->macroblocks_base + 1 + edge_x +
963 (s->mb_width + 1) * (edge_y + 1)
964 : s->macroblocks + edge_x +
965 (s->mb_height - edge_y - 1) * 2;
966 uint32_t mv = AV_RN32A(get_bmv_ptr(edge, vp7_mv_pred[i].subblock));
968 if (AV_RN32A(&near_mv[CNT_NEAREST])) {
969 if (mv == AV_RN32A(&near_mv[CNT_NEAREST])) {
971 } else if (AV_RN32A(&near_mv[CNT_NEAR])) {
972 if (mv != AV_RN32A(&near_mv[CNT_NEAR]))
976 AV_WN32A(&near_mv[CNT_NEAR], mv);
980 AV_WN32A(&near_mv[CNT_NEAREST], mv);
989 cnt[idx] += vp7_mv_pred[i].score;
992 mb->partitioning = VP8_SPLITMVMODE_NONE;
994 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_ZERO]][0])) {
995 mb->mode = VP8_MVMODE_MV;
997 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAREST]][1])) {
999 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][2])) {
1001 if (cnt[CNT_NEAREST] > cnt[CNT_NEAR])
1002 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAREST] ? 0 : AV_RN32A(&near_mv[CNT_NEAREST]));
1004 AV_WN32A(&mb->mv, cnt[CNT_ZERO] > cnt[CNT_NEAR] ? 0 : AV_RN32A(&near_mv[CNT_NEAR]));
1006 if (vp56_rac_get_prob_branchy(c, vp7_mode_contexts[cnt[CNT_NEAR]][3])) {
1007 mb->mode = VP8_MVMODE_SPLIT;
1008 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP7) - 1];
1010 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP7);
1011 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP7);
1012 mb->bmv[0] = mb->mv;
1015 mb->mv = near_mv[CNT_NEAR];
1016 mb->bmv[0] = mb->mv;
1019 mb->mv = near_mv[CNT_NEAREST];
1020 mb->bmv[0] = mb->mv;
1023 mb->mode = VP8_MVMODE_ZERO;
1025 mb->bmv[0] = mb->mv;
1029 static av_always_inline
1030 void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb,
1031 int mb_x, int mb_y, int layout)
1033 VP8Macroblock *mb_edge[3] = { 0 /* top */,
1036 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
1037 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
1039 int cur_sign_bias = s->sign_bias[mb->ref_frame];
1040 int8_t *sign_bias = s->sign_bias;
1042 uint8_t cnt[4] = { 0 };
1043 VP56RangeCoder *c = &s->c;
1045 if (!layout) { // layout is inlined (s->mb_layout is not)
1046 mb_edge[0] = mb + 2;
1047 mb_edge[2] = mb + 1;
1049 mb_edge[0] = mb - s->mb_width - 1;
1050 mb_edge[2] = mb - s->mb_width - 2;
1053 AV_ZERO32(&near_mv[0]);
1054 AV_ZERO32(&near_mv[1]);
1055 AV_ZERO32(&near_mv[2]);
1057 /* Process MB on top, left and top-left */
1058 #define MV_EDGE_CHECK(n) \
1060 VP8Macroblock *edge = mb_edge[n]; \
1061 int edge_ref = edge->ref_frame; \
1062 if (edge_ref != VP56_FRAME_CURRENT) { \
1063 uint32_t mv = AV_RN32A(&edge->mv); \
1065 if (cur_sign_bias != sign_bias[edge_ref]) { \
1066 /* SWAR negate of the values in mv. */ \
1068 mv = ((mv & 0x7fff7fff) + \
1069 0x00010001) ^ (mv & 0x80008000); \
1071 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1072 AV_WN32A(&near_mv[++idx], mv); \
1073 cnt[idx] += 1 + (n != 2); \
1075 cnt[CNT_ZERO] += 1 + (n != 2); \
1083 mb->partitioning = VP8_SPLITMVMODE_NONE;
1084 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
1085 mb->mode = VP8_MVMODE_MV;
1087 /* If we have three distinct MVs, merge first and last if they're the same */
1088 if (cnt[CNT_SPLITMV] &&
1089 AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
1090 cnt[CNT_NEAREST] += 1;
1092 /* Swap near and nearest if necessary */
1093 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
1094 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
1095 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
1098 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
1099 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
1100 /* Choose the best mv out of 0,0 and the nearest mv */
1101 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
1102 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
1103 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
1104 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
1106 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
1107 mb->mode = VP8_MVMODE_SPLIT;
1108 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout, IS_VP8) - 1];
1110 mb->mv.y += read_mv_component(c, s->prob->mvc[0], IS_VP8);
1111 mb->mv.x += read_mv_component(c, s->prob->mvc[1], IS_VP8);
1112 mb->bmv[0] = mb->mv;
1115 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
1116 mb->bmv[0] = mb->mv;
1119 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
1120 mb->bmv[0] = mb->mv;
1123 mb->mode = VP8_MVMODE_ZERO;
1125 mb->bmv[0] = mb->mv;
1129 static av_always_inline
1130 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
1131 int mb_x, int keyframe, int layout)
1133 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1136 VP8Macroblock *mb_top = mb - s->mb_width - 1;
1137 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
1142 uint8_t *const left = s->intra4x4_pred_mode_left;
1144 top = mb->intra4x4_pred_mode_top;
1146 top = s->intra4x4_pred_mode_top + 4 * mb_x;
1147 for (y = 0; y < 4; y++) {
1148 for (x = 0; x < 4; x++) {
1150 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
1151 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
1152 left[y] = top[x] = *intra4x4;
1158 for (i = 0; i < 16; i++)
1159 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree,
1160 vp8_pred4x4_prob_inter);
1164 static av_always_inline
1165 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1166 uint8_t *segment, uint8_t *ref, int layout, int is_vp7)
1168 VP56RangeCoder *c = &s->c;
1169 static const char * const vp7_feature_name[] = { "q-index",
1171 "partial-golden-update",
1176 for (i = 0; i < 4; i++) {
1177 if (s->feature_enabled[i]) {
1178 if (vp56_rac_get_prob(c, s->feature_present_prob[i])) {
1179 int index = vp8_rac_get_tree(c, vp7_feature_index_tree,
1180 s->feature_index_prob[i]);
1181 av_log(s->avctx, AV_LOG_WARNING,
1182 "Feature %s present in macroblock (value 0x%x)\n",
1183 vp7_feature_name[i], s->feature_value[i][index]);
1187 } else if (s->segmentation.update_map)
1188 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
1189 else if (s->segmentation.enabled)
1190 *segment = ref ? *ref : *segment;
1191 mb->segment = *segment;
1193 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
1196 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra,
1197 vp8_pred16x16_prob_intra);
1199 if (mb->mode == MODE_I4x4) {
1200 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
1202 const uint32_t modes = (is_vp7 ? vp7_pred4x4_mode
1203 : vp8_pred4x4_mode)[mb->mode] * 0x01010101u;
1204 if (s->mb_layout == 1)
1205 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
1207 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
1208 AV_WN32A(s->intra4x4_pred_mode_left, modes);
1211 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1212 vp8_pred8x8c_prob_intra);
1213 mb->ref_frame = VP56_FRAME_CURRENT;
1214 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
1216 if (vp56_rac_get_prob_branchy(c, s->prob->last))
1218 (!is_vp7 && vp56_rac_get_prob(c, s->prob->golden)) ? VP56_FRAME_GOLDEN2 /* altref */
1219 : VP56_FRAME_GOLDEN;
1221 mb->ref_frame = VP56_FRAME_PREVIOUS;
1222 s->ref_count[mb->ref_frame - 1]++;
1224 // motion vectors, 16.3
1226 vp7_decode_mvs(s, mb, mb_x, mb_y, layout);
1228 vp8_decode_mvs(s, mb, mb_x, mb_y, layout);
1231 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
1233 if (mb->mode == MODE_I4x4)
1234 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
1236 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree,
1238 mb->ref_frame = VP56_FRAME_CURRENT;
1239 mb->partitioning = VP8_SPLITMVMODE_NONE;
1240 AV_ZERO32(&mb->bmv[0]);
1245 * @param r arithmetic bitstream reader context
1246 * @param block destination for block coefficients
1247 * @param probs probabilities to use when reading trees from the bitstream
1248 * @param i initial coeff index, 0 unless a separate DC block is coded
1249 * @param qmul array holding the dc/ac dequant factor at position 0/1
1251 * @return 0 if no coeffs were decoded
1252 * otherwise, the index of the last coeff decoded plus one
1254 static av_always_inline
1255 int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
1256 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1257 int i, uint8_t *token_prob, int16_t qmul[2],
1258 const uint8_t scan[16], int vp7)
1260 VP56RangeCoder c = *r;
1265 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
1269 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
1271 break; // invalid input; blocks should end with EOB
1272 token_prob = probs[i][0];
1278 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
1280 token_prob = probs[i + 1][1];
1282 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
1283 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
1285 coeff += vp56_rac_get_prob(&c, token_prob[5]);
1289 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
1290 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
1291 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
1292 } else { // DCT_CAT2
1294 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
1295 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
1297 } else { // DCT_CAT3 and up
1298 int a = vp56_rac_get_prob(&c, token_prob[8]);
1299 int b = vp56_rac_get_prob(&c, token_prob[9 + a]);
1300 int cat = (a << 1) + b;
1301 coeff = 3 + (8 << cat);
1302 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
1305 token_prob = probs[i + 1][2];
1307 block[scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
1314 static av_always_inline
1315 int inter_predict_dc(int16_t block[16], int16_t pred[2])
1317 int16_t dc = block[0];
1325 if (!pred[0] | !dc | ((int32_t)pred[0] ^ (int32_t)dc) >> 31) {
1326 block[0] = pred[0] = dc;
1331 block[0] = pred[0] = dc;
1337 static int vp7_decode_block_coeffs_internal(VP56RangeCoder *r,
1339 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1340 int i, uint8_t *token_prob,
1342 const uint8_t scan[16])
1344 return decode_block_coeffs_internal(r, block, probs, i,
1345 token_prob, qmul, scan, IS_VP7);
1348 #ifndef vp8_decode_block_coeffs_internal
1349 static int vp8_decode_block_coeffs_internal(VP56RangeCoder *r,
1351 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1352 int i, uint8_t *token_prob,
1355 return decode_block_coeffs_internal(r, block, probs, i,
1356 token_prob, qmul, ff_zigzag_scan, IS_VP8);
1361 * @param c arithmetic bitstream reader context
1362 * @param block destination for block coefficients
1363 * @param probs probabilities to use when reading trees from the bitstream
1364 * @param i initial coeff index, 0 unless a separate DC block is coded
1365 * @param zero_nhood the initial prediction context for number of surrounding
1366 * all-zero blocks (only left/top, so 0-2)
1367 * @param qmul array holding the dc/ac dequant factor at position 0/1
1369 * @return 0 if no coeffs were decoded
1370 * otherwise, the index of the last coeff decoded plus one
1372 static av_always_inline
1373 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
1374 uint8_t probs[16][3][NUM_DCT_TOKENS - 1],
1375 int i, int zero_nhood, int16_t qmul[2],
1376 const uint8_t scan[16], int vp7)
1378 uint8_t *token_prob = probs[i][zero_nhood];
1379 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
1381 return vp7 ? vp7_decode_block_coeffs_internal(c, block, probs, i,
1382 token_prob, qmul, scan)
1383 : vp8_decode_block_coeffs_internal(c, block, probs, i,
1387 static av_always_inline
1388 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c,
1389 VP8Macroblock *mb, uint8_t t_nnz[9], uint8_t l_nnz[9],
1392 int i, x, y, luma_start = 0, luma_ctx = 3;
1393 int nnz_pred, nnz, nnz_total = 0;
1394 int segment = mb->segment;
1397 if (mb->mode != MODE_I4x4 && (is_vp7 || mb->mode != VP8_MVMODE_SPLIT)) {
1398 nnz_pred = t_nnz[8] + l_nnz[8];
1400 // decode DC values and do hadamard
1401 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0,
1402 nnz_pred, s->qmat[segment].luma_dc_qmul,
1403 ff_zigzag_scan, is_vp7);
1404 l_nnz[8] = t_nnz[8] = !!nnz;
1406 if (is_vp7 && mb->mode > MODE_I4x4) {
1407 nnz |= inter_predict_dc(td->block_dc,
1408 s->inter_dc_pred[mb->ref_frame - 1]);
1415 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
1417 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
1424 for (y = 0; y < 4; y++)
1425 for (x = 0; x < 4; x++) {
1426 nnz_pred = l_nnz[y] + t_nnz[x];
1427 nnz = decode_block_coeffs(c, td->block[y][x],
1428 s->prob->token[luma_ctx],
1429 luma_start, nnz_pred,
1430 s->qmat[segment].luma_qmul,
1431 s->prob[0].scan, is_vp7);
1432 /* nnz+block_dc may be one more than the actual last index,
1433 * but we don't care */
1434 td->non_zero_count_cache[y][x] = nnz + block_dc;
1435 t_nnz[x] = l_nnz[y] = !!nnz;
1440 // TODO: what to do about dimensions? 2nd dim for luma is x,
1441 // but for chroma it's (y<<1)|x
1442 for (i = 4; i < 6; i++)
1443 for (y = 0; y < 2; y++)
1444 for (x = 0; x < 2; x++) {
1445 nnz_pred = l_nnz[i + 2 * y] + t_nnz[i + 2 * x];
1446 nnz = decode_block_coeffs(c, td->block[i][(y << 1) + x],
1447 s->prob->token[2], 0, nnz_pred,
1448 s->qmat[segment].chroma_qmul,
1449 s->prob[0].scan, is_vp7);
1450 td->non_zero_count_cache[i][(y << 1) + x] = nnz;
1451 t_nnz[i + 2 * x] = l_nnz[i + 2 * y] = !!nnz;
1455 // if there were no coded coeffs despite the macroblock not being marked skip,
1456 // we MUST not do the inner loop filter and should not do IDCT
1457 // Since skip isn't used for bitstream prediction, just manually set it.
1462 static av_always_inline
1463 void backup_mb_border(uint8_t *top_border, uint8_t *src_y,
1464 uint8_t *src_cb, uint8_t *src_cr,
1465 ptrdiff_t linesize, ptrdiff_t uvlinesize, int simple)
1467 AV_COPY128(top_border, src_y + 15 * linesize);
1469 AV_COPY64(top_border + 16, src_cb + 7 * uvlinesize);
1470 AV_COPY64(top_border + 24, src_cr + 7 * uvlinesize);
1474 static av_always_inline
1475 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb,
1476 uint8_t *src_cr, ptrdiff_t linesize, ptrdiff_t uvlinesize, int mb_x,
1477 int mb_y, int mb_width, int simple, int xchg)
1479 uint8_t *top_border_m1 = top_border - 32; // for TL prediction
1481 src_cb -= uvlinesize;
1482 src_cr -= uvlinesize;
1484 #define XCHG(a, b, xchg) \
1492 XCHG(top_border_m1 + 8, src_y - 8, xchg);
1493 XCHG(top_border, src_y, xchg);
1494 XCHG(top_border + 8, src_y + 8, 1);
1495 if (mb_x < mb_width - 1)
1496 XCHG(top_border + 32, src_y + 16, 1);
1498 // only copy chroma for normal loop filter
1499 // or to initialize the top row to 127
1500 if (!simple || !mb_y) {
1501 XCHG(top_border_m1 + 16, src_cb - 8, xchg);
1502 XCHG(top_border_m1 + 24, src_cr - 8, xchg);
1503 XCHG(top_border + 16, src_cb, 1);
1504 XCHG(top_border + 24, src_cr, 1);
1508 static av_always_inline
1509 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
1512 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
1514 return mb_y ? mode : LEFT_DC_PRED8x8;
1517 static av_always_inline
1518 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y, int vp7)
1521 return mb_y ? VERT_PRED8x8 : (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8);
1523 return mb_y ? mode : HOR_PRED8x8;
1526 static av_always_inline
1527 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y, int vp7)
1531 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
1533 return !mb_y ? (vp7 ? DC_128_PRED8x8 : DC_127_PRED8x8) : mode;
1535 return !mb_x ? (vp7 ? DC_128_PRED8x8 : DC_129_PRED8x8) : mode;
1536 case PLANE_PRED8x8: /* TM */
1537 return check_tm_pred8x8_mode(mode, mb_x, mb_y, vp7);
1542 static av_always_inline
1543 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y, int vp7)
1546 return mb_y ? VERT_VP8_PRED : (vp7 ? DC_128_PRED : DC_129_PRED);
1548 return mb_y ? mode : HOR_VP8_PRED;
1552 static av_always_inline
1553 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y,
1554 int *copy_buf, int vp7)
1558 if (!mb_x && mb_y) {
1563 case DIAG_DOWN_LEFT_PRED:
1564 case VERT_LEFT_PRED:
1565 return !mb_y ? (vp7 ? DC_128_PRED : DC_127_PRED) : mode;
1573 return !mb_x ? (vp7 ? DC_128_PRED : DC_129_PRED) : mode;
1575 return check_tm_pred4x4_mode(mode, mb_x, mb_y, vp7);
1576 case DC_PRED: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1577 * as 16x16/8x8 DC */
1578 case DIAG_DOWN_RIGHT_PRED:
1579 case VERT_RIGHT_PRED:
1588 static av_always_inline
1589 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1590 VP8Macroblock *mb, int mb_x, int mb_y, int is_vp7)
1592 int x, y, mode, nnz;
1595 /* for the first row, we need to run xchg_mb_border to init the top edge
1596 * to 127 otherwise, skip it if we aren't going to deblock */
1597 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1598 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1599 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1600 s->filter.simple, 1);
1602 if (mb->mode < MODE_I4x4) {
1603 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y, is_vp7);
1604 s->hpc.pred16x16[mode](dst[0], s->linesize);
1606 uint8_t *ptr = dst[0];
1607 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1608 const uint8_t lo = is_vp7 ? 128 : 127;
1609 const uint8_t hi = is_vp7 ? 128 : 129;
1610 uint8_t tr_top[4] = { lo, lo, lo, lo };
1612 // all blocks on the right edge of the macroblock use bottom edge
1613 // the top macroblock for their topright edge
1614 uint8_t *tr_right = ptr - s->linesize + 16;
1616 // if we're on the right edge of the frame, said edge is extended
1617 // from the top macroblock
1618 if (mb_y && mb_x == s->mb_width - 1) {
1619 tr = tr_right[-1] * 0x01010101u;
1620 tr_right = (uint8_t *) &tr;
1624 AV_ZERO128(td->non_zero_count_cache);
1626 for (y = 0; y < 4; y++) {
1627 uint8_t *topright = ptr + 4 - s->linesize;
1628 for (x = 0; x < 4; x++) {
1630 ptrdiff_t linesize = s->linesize;
1631 uint8_t *dst = ptr + 4 * x;
1632 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5 * 8];
1634 if ((y == 0 || x == 3) && mb_y == 0) {
1637 topright = tr_right;
1639 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x,
1640 mb_y + y, ©, is_vp7);
1642 dst = copy_dst + 12;
1646 AV_WN32A(copy_dst + 4, lo * 0x01010101U);
1648 AV_COPY32(copy_dst + 4, ptr + 4 * x - s->linesize);
1652 copy_dst[3] = ptr[4 * x - s->linesize - 1];
1661 copy_dst[11] = ptr[4 * x - 1];
1662 copy_dst[19] = ptr[4 * x + s->linesize - 1];
1663 copy_dst[27] = ptr[4 * x + s->linesize * 2 - 1];
1664 copy_dst[35] = ptr[4 * x + s->linesize * 3 - 1];
1667 s->hpc.pred4x4[mode](dst, topright, linesize);
1669 AV_COPY32(ptr + 4 * x, copy_dst + 12);
1670 AV_COPY32(ptr + 4 * x + s->linesize, copy_dst + 20);
1671 AV_COPY32(ptr + 4 * x + s->linesize * 2, copy_dst + 28);
1672 AV_COPY32(ptr + 4 * x + s->linesize * 3, copy_dst + 36);
1675 nnz = td->non_zero_count_cache[y][x];
1678 s->vp8dsp.vp8_idct_dc_add(ptr + 4 * x,
1679 td->block[y][x], s->linesize);
1681 s->vp8dsp.vp8_idct_add(ptr + 4 * x,
1682 td->block[y][x], s->linesize);
1687 ptr += 4 * s->linesize;
1692 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode,
1693 mb_x, mb_y, is_vp7);
1694 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1695 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1697 if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1698 xchg_mb_border(s->top_border[mb_x + 1], dst[0], dst[1], dst[2],
1699 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1700 s->filter.simple, 0);
1703 static const uint8_t subpel_idx[3][8] = {
1704 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1705 // also function pointer index
1706 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1707 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1713 * @param s VP8 decoding context
1714 * @param dst target buffer for block data at block position
1715 * @param ref reference picture buffer at origin (0, 0)
1716 * @param mv motion vector (relative to block position) to get pixel data from
1717 * @param x_off horizontal position of block from origin (0, 0)
1718 * @param y_off vertical position of block from origin (0, 0)
1719 * @param block_w width of block (16, 8 or 4)
1720 * @param block_h height of block (always same as block_w)
1721 * @param width width of src/dst plane data
1722 * @param height height of src/dst plane data
1723 * @param linesize size of a single line of plane data, including padding
1724 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1726 static av_always_inline
1727 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1728 ThreadFrame *ref, const VP56mv *mv,
1729 int x_off, int y_off, int block_w, int block_h,
1730 int width, int height, ptrdiff_t linesize,
1731 vp8_mc_func mc_func[3][3])
1733 uint8_t *src = ref->f->data[0];
1736 ptrdiff_t src_linesize = linesize;
1738 int mx = (mv->x << 1) & 7, mx_idx = subpel_idx[0][mx];
1739 int my = (mv->y << 1) & 7, my_idx = subpel_idx[0][my];
1741 x_off += mv->x >> 2;
1742 y_off += mv->y >> 2;
1745 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1746 src += y_off * linesize + x_off;
1747 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1748 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1749 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1750 src - my_idx * linesize - mx_idx,
1751 EDGE_EMU_LINESIZE, linesize,
1752 block_w + subpel_idx[1][mx],
1753 block_h + subpel_idx[1][my],
1754 x_off - mx_idx, y_off - my_idx,
1756 src = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1757 src_linesize = EDGE_EMU_LINESIZE;
1759 mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
1761 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1762 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off,
1763 linesize, block_h, 0, 0);
1768 * chroma MC function
1770 * @param s VP8 decoding context
1771 * @param dst1 target buffer for block data at block position (U plane)
1772 * @param dst2 target buffer for block data at block position (V plane)
1773 * @param ref reference picture buffer at origin (0, 0)
1774 * @param mv motion vector (relative to block position) to get pixel data from
1775 * @param x_off horizontal position of block from origin (0, 0)
1776 * @param y_off vertical position of block from origin (0, 0)
1777 * @param block_w width of block (16, 8 or 4)
1778 * @param block_h height of block (always same as block_w)
1779 * @param width width of src/dst plane data
1780 * @param height height of src/dst plane data
1781 * @param linesize size of a single line of plane data, including padding
1782 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1784 static av_always_inline
1785 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1,
1786 uint8_t *dst2, ThreadFrame *ref, const VP56mv *mv,
1787 int x_off, int y_off, int block_w, int block_h,
1788 int width, int height, ptrdiff_t linesize,
1789 vp8_mc_func mc_func[3][3])
1791 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1794 int mx = mv->x & 7, mx_idx = subpel_idx[0][mx];
1795 int my = mv->y & 7, my_idx = subpel_idx[0][my];
1797 x_off += mv->x >> 3;
1798 y_off += mv->y >> 3;
1801 src1 += y_off * linesize + x_off;
1802 src2 += y_off * linesize + x_off;
1803 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1804 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1805 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1806 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1807 src1 - my_idx * linesize - mx_idx,
1808 EDGE_EMU_LINESIZE, linesize,
1809 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1810 x_off - mx_idx, y_off - my_idx, width, height);
1811 src1 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1812 mc_func[my_idx][mx_idx](dst1, linesize, src1, EDGE_EMU_LINESIZE, block_h, mx, my);
1814 s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
1815 src2 - my_idx * linesize - mx_idx,
1816 EDGE_EMU_LINESIZE, linesize,
1817 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1818 x_off - mx_idx, y_off - my_idx, width, height);
1819 src2 = td->edge_emu_buffer + mx_idx + EDGE_EMU_LINESIZE * my_idx;
1820 mc_func[my_idx][mx_idx](dst2, linesize, src2, EDGE_EMU_LINESIZE, block_h, mx, my);
1822 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1823 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1826 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1827 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1828 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1832 static av_always_inline
1833 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1834 ThreadFrame *ref_frame, int x_off, int y_off,
1835 int bx_off, int by_off, int block_w, int block_h,
1836 int width, int height, VP56mv *mv)
1841 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1842 ref_frame, mv, x_off + bx_off, y_off + by_off,
1843 block_w, block_h, width, height, s->linesize,
1844 s->put_pixels_tab[block_w == 8]);
1847 if (s->profile == 3) {
1848 /* this block only applies VP8; it is safe to check
1849 * only the profile, as VP7 profile <= 1 */
1861 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1862 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1863 &uvmv, x_off + bx_off, y_off + by_off,
1864 block_w, block_h, width, height, s->uvlinesize,
1865 s->put_pixels_tab[1 + (block_w == 4)]);
1868 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1869 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1870 static av_always_inline
1871 void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
1874 /* Don't prefetch refs that haven't been used very often this frame. */
1875 if (s->ref_count[ref - 1] > (mb_xy >> 5)) {
1876 int x_off = mb_x << 4, y_off = mb_y << 4;
1877 int mx = (mb->mv.x >> 2) + x_off + 8;
1878 int my = (mb->mv.y >> 2) + y_off;
1879 uint8_t **src = s->framep[ref]->tf.f->data;
1880 int off = mx + (my + (mb_x & 3) * 4) * s->linesize + 64;
1881 /* For threading, a ff_thread_await_progress here might be useful, but
1882 * it actually slows down the decoder. Since a bad prefetch doesn't
1883 * generate bad decoder output, we don't run it here. */
1884 s->vdsp.prefetch(src[0] + off, s->linesize, 4);
1885 off = (mx >> 1) + ((my >> 1) + (mb_x & 7)) * s->uvlinesize + 64;
1886 s->vdsp.prefetch(src[1] + off, src[2] - src[1], 2);
1891 * Apply motion vectors to prediction buffer, chapter 18.
1893 static av_always_inline
1894 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1895 VP8Macroblock *mb, int mb_x, int mb_y)
1897 int x_off = mb_x << 4, y_off = mb_y << 4;
1898 int width = 16 * s->mb_width, height = 16 * s->mb_height;
1899 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1900 VP56mv *bmv = mb->bmv;
1902 switch (mb->partitioning) {
1903 case VP8_SPLITMVMODE_NONE:
1904 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1905 0, 0, 16, 16, width, height, &mb->mv);
1907 case VP8_SPLITMVMODE_4x4: {
1912 for (y = 0; y < 4; y++) {
1913 for (x = 0; x < 4; x++) {
1914 vp8_mc_luma(s, td, dst[0] + 4 * y * s->linesize + x * 4,
1915 ref, &bmv[4 * y + x],
1916 4 * x + x_off, 4 * y + y_off, 4, 4,
1917 width, height, s->linesize,
1918 s->put_pixels_tab[2]);
1927 for (y = 0; y < 2; y++) {
1928 for (x = 0; x < 2; x++) {
1929 uvmv.x = mb->bmv[2 * y * 4 + 2 * x ].x +
1930 mb->bmv[2 * y * 4 + 2 * x + 1].x +
1931 mb->bmv[(2 * y + 1) * 4 + 2 * x ].x +
1932 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].x;
1933 uvmv.y = mb->bmv[2 * y * 4 + 2 * x ].y +
1934 mb->bmv[2 * y * 4 + 2 * x + 1].y +
1935 mb->bmv[(2 * y + 1) * 4 + 2 * x ].y +
1936 mb->bmv[(2 * y + 1) * 4 + 2 * x + 1].y;
1937 uvmv.x = (uvmv.x + 2 + FF_SIGNBIT(uvmv.x)) >> 2;
1938 uvmv.y = (uvmv.y + 2 + FF_SIGNBIT(uvmv.y)) >> 2;
1939 if (s->profile == 3) {
1943 vp8_mc_chroma(s, td, dst[1] + 4 * y * s->uvlinesize + x * 4,
1944 dst[2] + 4 * y * s->uvlinesize + x * 4, ref,
1945 &uvmv, 4 * x + x_off, 4 * y + y_off, 4, 4,
1946 width, height, s->uvlinesize,
1947 s->put_pixels_tab[2]);
1952 case VP8_SPLITMVMODE_16x8:
1953 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1954 0, 0, 16, 8, width, height, &bmv[0]);
1955 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1956 0, 8, 16, 8, width, height, &bmv[1]);
1958 case VP8_SPLITMVMODE_8x16:
1959 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1960 0, 0, 8, 16, width, height, &bmv[0]);
1961 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1962 8, 0, 8, 16, width, height, &bmv[1]);
1964 case VP8_SPLITMVMODE_8x8:
1965 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1966 0, 0, 8, 8, width, height, &bmv[0]);
1967 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1968 8, 0, 8, 8, width, height, &bmv[1]);
1969 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1970 0, 8, 8, 8, width, height, &bmv[2]);
1971 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1972 8, 8, 8, 8, width, height, &bmv[3]);
1977 static av_always_inline
1978 void idct_mb(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3], VP8Macroblock *mb)
1982 if (mb->mode != MODE_I4x4) {
1983 uint8_t *y_dst = dst[0];
1984 for (y = 0; y < 4; y++) {
1985 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1987 if (nnz4 & ~0x01010101) {
1988 for (x = 0; x < 4; x++) {
1989 if ((uint8_t) nnz4 == 1)
1990 s->vp8dsp.vp8_idct_dc_add(y_dst + 4 * x,
1993 else if ((uint8_t) nnz4 > 1)
1994 s->vp8dsp.vp8_idct_add(y_dst + 4 * x,
2002 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
2005 y_dst += 4 * s->linesize;
2009 for (ch = 0; ch < 2; ch++) {
2010 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4 + ch]);
2012 uint8_t *ch_dst = dst[1 + ch];
2013 if (nnz4 & ~0x01010101) {
2014 for (y = 0; y < 2; y++) {
2015 for (x = 0; x < 2; x++) {
2016 if ((uint8_t) nnz4 == 1)
2017 s->vp8dsp.vp8_idct_dc_add(ch_dst + 4 * x,
2018 td->block[4 + ch][(y << 1) + x],
2020 else if ((uint8_t) nnz4 > 1)
2021 s->vp8dsp.vp8_idct_add(ch_dst + 4 * x,
2022 td->block[4 + ch][(y << 1) + x],
2026 goto chroma_idct_end;
2028 ch_dst += 4 * s->uvlinesize;
2031 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4 + ch], s->uvlinesize);
2039 static av_always_inline
2040 void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb,
2041 VP8FilterStrength *f, int is_vp7)
2043 int interior_limit, filter_level;
2045 if (s->segmentation.enabled) {
2046 filter_level = s->segmentation.filter_level[mb->segment];
2047 if (!s->segmentation.absolute_vals)
2048 filter_level += s->filter.level;
2050 filter_level = s->filter.level;
2052 if (s->lf_delta.enabled) {
2053 filter_level += s->lf_delta.ref[mb->ref_frame];
2054 filter_level += s->lf_delta.mode[mb->mode];
2057 filter_level = av_clip_uintp2(filter_level, 6);
2059 interior_limit = filter_level;
2060 if (s->filter.sharpness) {
2061 interior_limit >>= (s->filter.sharpness + 3) >> 2;
2062 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
2064 interior_limit = FFMAX(interior_limit, 1);
2066 f->filter_level = filter_level;
2067 f->inner_limit = interior_limit;
2068 f->inner_filter = is_vp7 || !mb->skip || mb->mode == MODE_I4x4 ||
2069 mb->mode == VP8_MVMODE_SPLIT;
2072 static av_always_inline
2073 void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f,
2074 int mb_x, int mb_y, int is_vp7)
2076 int mbedge_lim, bedge_lim_y, bedge_lim_uv, hev_thresh;
2077 int filter_level = f->filter_level;
2078 int inner_limit = f->inner_limit;
2079 int inner_filter = f->inner_filter;
2080 ptrdiff_t linesize = s->linesize;
2081 ptrdiff_t uvlinesize = s->uvlinesize;
2082 static const uint8_t hev_thresh_lut[2][64] = {
2083 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2084 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2085 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2087 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2088 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2089 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2097 bedge_lim_y = filter_level;
2098 bedge_lim_uv = filter_level * 2;
2099 mbedge_lim = filter_level + 2;
2102 bedge_lim_uv = filter_level * 2 + inner_limit;
2103 mbedge_lim = bedge_lim_y + 4;
2106 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
2109 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
2110 mbedge_lim, inner_limit, hev_thresh);
2111 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
2112 mbedge_lim, inner_limit, hev_thresh);
2115 #define H_LOOP_FILTER_16Y_INNER(cond) \
2116 if (cond && inner_filter) { \
2117 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2118 bedge_lim_y, inner_limit, \
2120 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2121 bedge_lim_y, inner_limit, \
2123 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2124 bedge_lim_y, inner_limit, \
2126 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2127 uvlinesize, bedge_lim_uv, \
2128 inner_limit, hev_thresh); \
2131 H_LOOP_FILTER_16Y_INNER(!is_vp7)
2134 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
2135 mbedge_lim, inner_limit, hev_thresh);
2136 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
2137 mbedge_lim, inner_limit, hev_thresh);
2141 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 4 * linesize,
2142 linesize, bedge_lim_y,
2143 inner_limit, hev_thresh);
2144 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 8 * linesize,
2145 linesize, bedge_lim_y,
2146 inner_limit, hev_thresh);
2147 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0] + 12 * linesize,
2148 linesize, bedge_lim_y,
2149 inner_limit, hev_thresh);
2150 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
2151 dst[2] + 4 * uvlinesize,
2152 uvlinesize, bedge_lim_uv,
2153 inner_limit, hev_thresh);
2156 H_LOOP_FILTER_16Y_INNER(is_vp7)
2159 static av_always_inline
2160 void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f,
2163 int mbedge_lim, bedge_lim;
2164 int filter_level = f->filter_level;
2165 int inner_limit = f->inner_limit;
2166 int inner_filter = f->inner_filter;
2167 ptrdiff_t linesize = s->linesize;
2172 bedge_lim = 2 * filter_level + inner_limit;
2173 mbedge_lim = bedge_lim + 4;
2176 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
2178 s->vp8dsp.vp8_h_loop_filter_simple(dst + 4, linesize, bedge_lim);
2179 s->vp8dsp.vp8_h_loop_filter_simple(dst + 8, linesize, bedge_lim);
2180 s->vp8dsp.vp8_h_loop_filter_simple(dst + 12, linesize, bedge_lim);
2184 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
2186 s->vp8dsp.vp8_v_loop_filter_simple(dst + 4 * linesize, linesize, bedge_lim);
2187 s->vp8dsp.vp8_v_loop_filter_simple(dst + 8 * linesize, linesize, bedge_lim);
2188 s->vp8dsp.vp8_v_loop_filter_simple(dst + 12 * linesize, linesize, bedge_lim);
2192 #define MARGIN (16 << 2)
2193 static av_always_inline
2194 void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
2195 VP8Frame *prev_frame, int is_vp7)
2197 VP8Context *s = avctx->priv_data;
2200 s->mv_min.y = -MARGIN;
2201 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2202 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
2203 VP8Macroblock *mb = s->macroblocks_base +
2204 ((s->mb_width + 1) * (mb_y + 1) + 1);
2205 int mb_xy = mb_y * s->mb_width;
2207 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2209 s->mv_min.x = -MARGIN;
2210 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2211 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2213 AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top,
2214 DC_PRED * 0x01010101);
2215 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2216 prev_frame && prev_frame->seg_map ?
2217 prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7);
2226 static void vp7_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2227 VP8Frame *prev_frame)
2229 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP7);
2232 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *cur_frame,
2233 VP8Frame *prev_frame)
2235 vp78_decode_mv_mb_modes(avctx, cur_frame, prev_frame, IS_VP8);
2239 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2241 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2242 if (otd->thread_mb_pos < tmp) { \
2243 pthread_mutex_lock(&otd->lock); \
2244 td->wait_mb_pos = tmp; \
2246 if (otd->thread_mb_pos >= tmp) \
2248 pthread_cond_wait(&otd->cond, &otd->lock); \
2250 td->wait_mb_pos = INT_MAX; \
2251 pthread_mutex_unlock(&otd->lock); \
2255 #define update_pos(td, mb_y, mb_x) \
2257 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2258 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2260 int is_null = !next_td || !prev_td; \
2261 int pos_check = (is_null) ? 1 \
2262 : (next_td != td && \
2263 pos >= next_td->wait_mb_pos) || \
2265 pos >= prev_td->wait_mb_pos); \
2266 td->thread_mb_pos = pos; \
2267 if (sliced_threading && pos_check) { \
2268 pthread_mutex_lock(&td->lock); \
2269 pthread_cond_broadcast(&td->cond); \
2270 pthread_mutex_unlock(&td->lock); \
2274 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2275 #define update_pos(td, mb_y, mb_x)
2278 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
2279 int jobnr, int threadnr, int is_vp7)
2281 VP8Context *s = avctx->priv_data;
2282 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
2283 int mb_y = td->thread_mb_pos >> 16;
2284 int mb_x, mb_xy = mb_y * s->mb_width;
2285 int num_jobs = s->num_jobs;
2286 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
2287 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions - 1)];
2290 curframe->tf.f->data[0] + 16 * mb_y * s->linesize,
2291 curframe->tf.f->data[1] + 8 * mb_y * s->uvlinesize,
2292 curframe->tf.f->data[2] + 8 * mb_y * s->uvlinesize
2297 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2298 if (mb_y == s->mb_height - 1)
2301 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2302 if (s->mb_layout == 1)
2303 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2305 // Make sure the previous frame has read its segmentation map,
2306 // if we re-use the same map.
2307 if (prev_frame && s->segmentation.enabled &&
2308 !s->segmentation.update_map)
2309 ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
2310 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2311 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
2312 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101);
2315 if (!is_vp7 || mb_y == 0)
2316 memset(td->left_nnz, 0, sizeof(td->left_nnz));
2318 s->mv_min.x = -MARGIN;
2319 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
2321 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
2322 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2323 if (prev_td != td) {
2324 if (threadnr != 0) {
2325 check_thread_pos(td, prev_td,
2326 mb_x + (is_vp7 ? 2 : 1),
2327 mb_y - (is_vp7 ? 2 : 1));
2329 check_thread_pos(td, prev_td,
2330 mb_x + (is_vp7 ? 2 : 1) + s->mb_width + 3,
2331 mb_y - (is_vp7 ? 2 : 1));
2335 s->vdsp.prefetch(dst[0] + (mb_x & 3) * 4 * s->linesize + 64,
2337 s->vdsp.prefetch(dst[1] + (mb_x & 7) * s->uvlinesize + 64,
2338 dst[2] - dst[1], 2);
2341 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
2342 prev_frame && prev_frame->seg_map ?
2343 prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7);
2345 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
2348 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz, is_vp7);
2350 if (mb->mode <= MODE_I4x4)
2351 intra_predict(s, td, dst, mb, mb_x, mb_y, is_vp7);
2353 inter_predict(s, td, dst, mb, mb_x, mb_y);
2355 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
2358 idct_mb(s, td, dst, mb);
2360 AV_ZERO64(td->left_nnz);
2361 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
2363 /* Reset DC block predictors if they would exist
2364 * if the mb had coefficients */
2365 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
2366 td->left_nnz[8] = 0;
2367 s->top_nnz[mb_x][8] = 0;
2371 if (s->deblock_filter)
2372 filter_level_for_mb(s, mb, &td->filter_strength[mb_x], is_vp7);
2374 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs - 1) {
2375 if (s->filter.simple)
2376 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2377 NULL, NULL, s->linesize, 0, 1);
2379 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2380 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2383 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
2391 if (mb_x == s->mb_width + 1) {
2392 update_pos(td, mb_y, s->mb_width + 3);
2394 update_pos(td, mb_y, mb_x);
2399 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
2400 int jobnr, int threadnr, int is_vp7)
2402 VP8Context *s = avctx->priv_data;
2403 VP8ThreadData *td = &s->thread_data[threadnr];
2404 int mb_x, mb_y = td->thread_mb_pos >> 16, num_jobs = s->num_jobs;
2405 AVFrame *curframe = s->curframe->tf.f;
2407 VP8ThreadData *prev_td, *next_td;
2409 curframe->data[0] + 16 * mb_y * s->linesize,
2410 curframe->data[1] + 8 * mb_y * s->uvlinesize,
2411 curframe->data[2] + 8 * mb_y * s->uvlinesize
2414 if (s->mb_layout == 1)
2415 mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1);
2417 mb = s->macroblocks + (s->mb_height - mb_y - 1) * 2;
2422 prev_td = &s->thread_data[(jobnr + num_jobs - 1) % num_jobs];
2423 if (mb_y == s->mb_height - 1)
2426 next_td = &s->thread_data[(jobnr + 1) % num_jobs];
2428 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
2429 VP8FilterStrength *f = &td->filter_strength[mb_x];
2431 check_thread_pos(td, prev_td,
2432 (mb_x + 1) + (s->mb_width + 3), mb_y - 1);
2434 if (next_td != &s->thread_data[0])
2435 check_thread_pos(td, next_td, mb_x + 1, mb_y + 1);
2437 if (num_jobs == 1) {
2438 if (s->filter.simple)
2439 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2440 NULL, NULL, s->linesize, 0, 1);
2442 backup_mb_border(s->top_border[mb_x + 1], dst[0],
2443 dst[1], dst[2], s->linesize, s->uvlinesize, 0);
2446 if (s->filter.simple)
2447 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
2449 filter_mb(s, dst, f, mb_x, mb_y, is_vp7);
2454 update_pos(td, mb_y, (s->mb_width + 3) + mb_x);
2458 static av_always_inline
2459 int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr,
2460 int threadnr, int is_vp7)
2462 VP8Context *s = avctx->priv_data;
2463 VP8ThreadData *td = &s->thread_data[jobnr];
2464 VP8ThreadData *next_td = NULL, *prev_td = NULL;
2465 VP8Frame *curframe = s->curframe;
2466 int mb_y, num_jobs = s->num_jobs;
2468 td->thread_nr = threadnr;
2469 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
2470 if (mb_y >= s->mb_height)
2472 td->thread_mb_pos = mb_y << 16;
2473 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr, is_vp7);
2474 if (s->deblock_filter)
2475 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr, is_vp7);
2476 update_pos(td, mb_y, INT_MAX & 0xFFFF);
2481 if (avctx->active_thread_type == FF_THREAD_FRAME)
2482 ff_thread_report_progress(&curframe->tf, mb_y, 0);
2488 static int vp7_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2489 int jobnr, int threadnr)
2491 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP7);
2494 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
2495 int jobnr, int threadnr)
2497 return vp78_decode_mb_row_sliced(avctx, tdata, jobnr, threadnr, IS_VP8);
2500 static av_always_inline
2501 int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2502 AVPacket *avpkt, int is_vp7)
2504 VP8Context *s = avctx->priv_data;
2505 int ret, i, referenced, num_jobs;
2506 enum AVDiscard skip_thresh;
2507 VP8Frame *av_uninit(curframe), *prev_frame;
2510 ret = vp7_decode_frame_header(s, avpkt->data, avpkt->size);
2512 ret = vp8_decode_frame_header(s, avpkt->data, avpkt->size);
2517 if (!is_vp7 && s->pix_fmt == AV_PIX_FMT_NONE) {
2518 enum AVPixelFormat pix_fmts[] = {
2519 #if CONFIG_VP8_VAAPI_HWACCEL
2526 s->pix_fmt = ff_get_format(s->avctx, pix_fmts);
2527 if (s->pix_fmt < 0) {
2528 ret = AVERROR(EINVAL);
2531 avctx->pix_fmt = s->pix_fmt;
2534 prev_frame = s->framep[VP56_FRAME_CURRENT];
2536 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT ||
2537 s->update_altref == VP56_FRAME_CURRENT;
2539 skip_thresh = !referenced ? AVDISCARD_NONREF
2540 : !s->keyframe ? AVDISCARD_NONKEY
2543 if (avctx->skip_frame >= skip_thresh) {
2545 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2548 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
2550 // release no longer referenced frames
2551 for (i = 0; i < 5; i++)
2552 if (s->frames[i].tf.f->data[0] &&
2553 &s->frames[i] != prev_frame &&
2554 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
2555 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
2556 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
2557 vp8_release_frame(s, &s->frames[i]);
2559 curframe = s->framep[VP56_FRAME_CURRENT] = vp8_find_free_buffer(s);
2562 avctx->colorspace = AVCOL_SPC_BT470BG;
2564 avctx->color_range = AVCOL_RANGE_JPEG;
2566 avctx->color_range = AVCOL_RANGE_MPEG;
2568 /* Given that arithmetic probabilities are updated every frame, it's quite
2569 * likely that the values we have on a random interframe are complete
2570 * junk if we didn't start decode on a keyframe. So just don't display
2571 * anything rather than junk. */
2572 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
2573 !s->framep[VP56_FRAME_GOLDEN] ||
2574 !s->framep[VP56_FRAME_GOLDEN2])) {
2575 av_log(avctx, AV_LOG_WARNING,
2576 "Discarding interframe without a prior keyframe!\n");
2577 ret = AVERROR_INVALIDDATA;
2581 curframe->tf.f->key_frame = s->keyframe;
2582 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I
2583 : AV_PICTURE_TYPE_P;
2584 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
2585 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
2589 // check if golden and altref are swapped
2590 if (s->update_altref != VP56_FRAME_NONE)
2591 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
2593 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
2595 if (s->update_golden != VP56_FRAME_NONE)
2596 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
2598 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
2601 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
2603 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
2605 s->next_framep[VP56_FRAME_CURRENT] = curframe;
2607 ff_thread_finish_setup(avctx);
2609 if (avctx->hwaccel) {
2610 ret = avctx->hwaccel->start_frame(avctx, avpkt->data, avpkt->size);
2614 ret = avctx->hwaccel->decode_slice(avctx, avpkt->data, avpkt->size);
2618 ret = avctx->hwaccel->end_frame(avctx);
2623 s->linesize = curframe->tf.f->linesize[0];
2624 s->uvlinesize = curframe->tf.f->linesize[1];
2626 memset(s->top_nnz, 0, s->mb_width * sizeof(*s->top_nnz));
2627 /* Zero macroblock structures for top/top-left prediction
2628 * from outside the frame. */
2630 memset(s->macroblocks + s->mb_height * 2 - 1, 0,
2631 (s->mb_width + 1) * sizeof(*s->macroblocks));
2632 if (!s->mb_layout && s->keyframe)
2633 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width * 4);
2635 memset(s->ref_count, 0, sizeof(s->ref_count));
2637 if (s->mb_layout == 1) {
2638 // Make sure the previous frame has read its segmentation map,
2639 // if we re-use the same map.
2640 if (prev_frame && s->segmentation.enabled &&
2641 !s->segmentation.update_map)
2642 ff_thread_await_progress(&prev_frame->tf, 1, 0);
2644 vp7_decode_mv_mb_modes(avctx, curframe, prev_frame);
2646 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
2649 if (avctx->active_thread_type == FF_THREAD_FRAME)
2652 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
2653 s->num_jobs = num_jobs;
2654 s->curframe = curframe;
2655 s->prev_frame = prev_frame;
2656 s->mv_min.y = -MARGIN;
2657 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
2658 for (i = 0; i < MAX_THREADS; i++) {
2659 s->thread_data[i].thread_mb_pos = 0;
2660 s->thread_data[i].wait_mb_pos = INT_MAX;
2664 avctx->execute2(avctx, vp7_decode_mb_row_sliced, s->thread_data, NULL,
2667 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL,
2671 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
2672 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
2675 // if future frames don't use the updated probabilities,
2676 // reset them to the values we saved
2677 if (!s->update_probabilities)
2678 s->prob[0] = s->prob[1];
2680 if (!s->invisible) {
2681 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
2688 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2692 int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2695 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP8);
2698 #if CONFIG_VP7_DECODER
2699 static int vp7_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
2702 return vp78_decode_frame(avctx, data, got_frame, avpkt, IS_VP7);
2704 #endif /* CONFIG_VP7_DECODER */
2706 av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
2708 VP8Context *s = avctx->priv_data;
2711 vp8_decode_flush_impl(avctx, 1);
2712 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2713 av_frame_free(&s->frames[i].tf.f);
2718 static av_cold int vp8_init_frames(VP8Context *s)
2721 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2722 s->frames[i].tf.f = av_frame_alloc();
2723 if (!s->frames[i].tf.f)
2724 return AVERROR(ENOMEM);
2729 static av_always_inline
2730 int vp78_decode_init(AVCodecContext *avctx, int is_vp7)
2732 VP8Context *s = avctx->priv_data;
2736 s->pix_fmt = AV_PIX_FMT_NONE;
2737 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2738 avctx->internal->allocate_progress = 1;
2740 ff_videodsp_init(&s->vdsp, 8);
2742 ff_vp78dsp_init(&s->vp8dsp);
2743 if (CONFIG_VP7_DECODER && is_vp7) {
2744 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP7, 8, 1);
2745 ff_vp7dsp_init(&s->vp8dsp);
2746 } else if (CONFIG_VP8_DECODER && !is_vp7) {
2747 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2748 ff_vp8dsp_init(&s->vp8dsp);
2751 /* does not change for VP8 */
2752 memcpy(s->prob[0].scan, ff_zigzag_scan, sizeof(s->prob[0].scan));
2754 if ((ret = vp8_init_frames(s)) < 0) {
2755 ff_vp8_decode_free(avctx);
2762 #if CONFIG_VP7_DECODER
2763 static int vp7_decode_init(AVCodecContext *avctx)
2765 return vp78_decode_init(avctx, IS_VP7);
2767 #endif /* CONFIG_VP7_DECODER */
2769 av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
2771 return vp78_decode_init(avctx, IS_VP8);
2774 #if CONFIG_VP8_DECODER
2775 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2777 VP8Context *s = avctx->priv_data;
2782 if ((ret = vp8_init_frames(s)) < 0) {
2783 ff_vp8_decode_free(avctx);
2790 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2792 static int vp8_decode_update_thread_context(AVCodecContext *dst,
2793 const AVCodecContext *src)
2795 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2798 if (s->macroblocks_base &&
2799 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2801 s->mb_width = s_src->mb_width;
2802 s->mb_height = s_src->mb_height;
2805 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2806 s->segmentation = s_src->segmentation;
2807 s->lf_delta = s_src->lf_delta;
2808 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2810 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2811 if (s_src->frames[i].tf.f->data[0]) {
2812 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2818 s->framep[0] = REBASE(s_src->next_framep[0]);
2819 s->framep[1] = REBASE(s_src->next_framep[1]);
2820 s->framep[2] = REBASE(s_src->next_framep[2]);
2821 s->framep[3] = REBASE(s_src->next_framep[3]);
2825 #endif /* CONFIG_VP8_DECODER */
2827 #if CONFIG_VP7_DECODER
2828 AVCodec ff_vp7_decoder = {
2830 .long_name = NULL_IF_CONFIG_SMALL("On2 VP7"),
2831 .type = AVMEDIA_TYPE_VIDEO,
2832 .id = AV_CODEC_ID_VP7,
2833 .priv_data_size = sizeof(VP8Context),
2834 .init = vp7_decode_init,
2835 .close = ff_vp8_decode_free,
2836 .decode = vp7_decode_frame,
2837 .capabilities = AV_CODEC_CAP_DR1,
2838 .flush = vp8_decode_flush,
2840 #endif /* CONFIG_VP7_DECODER */
2842 #if CONFIG_VP8_DECODER
2843 AVCodec ff_vp8_decoder = {
2845 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2846 .type = AVMEDIA_TYPE_VIDEO,
2847 .id = AV_CODEC_ID_VP8,
2848 .priv_data_size = sizeof(VP8Context),
2849 .init = ff_vp8_decode_init,
2850 .close = ff_vp8_decode_free,
2851 .decode = ff_vp8_decode_frame,
2852 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
2853 AV_CODEC_CAP_SLICE_THREADS,
2854 .flush = vp8_decode_flush,
2855 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2856 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
2858 #endif /* CONFIG_VP7_DECODER */